Пример #1
0
def init_dataframe(args, input_file, sub_type):
    """Initialize dataframe with data from input file; convert temperature and pressure to SI units"""
    check_na = -9999

    df, columns = common.load_dataframe(sub_type, input_file, 0)
    df.replace(check_na, np.nan, inplace=True)

    if sub_type == 'imau/ant':
        temperature_vars = ['temp_cnr1', 'ta',
                            'tsn1a', 'tsn2a', 'tsn3a', 'tsn4a', 'tsn5a',
                            'tsn1b', 'tsn2b', 'tsn3b', 'tsn4b', 'tsn5b',
                            'temp_logger']
        pressure_vars = ['pa']

    elif sub_type == 'imau/grl':
        temperature_vars = ['temp_cnr1', 'ta2', 'ta6',
                            'tsn1', 'tsn2', 'tsn3', 'tsn4', 'tsn5',
                            'datalogger']
        pressure_vars = ['pa']

    if not args.celsius:
        df.loc[:, temperature_vars] += common.freezing_point_temp  # Convert units to Kelvin

    if not args.mb:
        df.loc[:, pressure_vars] *= common.pascal_per_millibar  # Convert units to millibar/hPa

    df = df.where((pd.notnull(df)), common.get_fillvalue(args))

    return df, temperature_vars, pressure_vars
Пример #2
0
def scar2nc(args, input_file, output_file):
    """Main function to convert SCAR txt file to netCDF"""
    df, temperature_vars, pressure_vars, station_name, latitude, longitude, height, country, institution = init_dataframe(
        args, input_file)
    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    common.log(args, 2, 'Calculating time and sza')
    time, time_bounds, sza, day_of_year = get_time_and_sza(
        args, df, latitude, longitude)

    ds['day_of_year'] = 'time', day_of_year

    ds['time'] = 'time', time
    ds['time_bounds'] = ('time', 'nbnd'), time_bounds
    ds['sza'] = 'time', sza
    ds['station_name'] = tuple(), station_name
    ds['latitude'] = tuple(), latitude
    ds['longitude'] = tuple(), longitude
    ds['height'] = tuple(), height

    comp_level = args.dfl_lvl

    common.load_dataset_attributes('scar',
                                   ds,
                                   args,
                                   country=country,
                                   institution=institution,
                                   temperature_vars=temperature_vars,
                                   pressure_vars=pressure_vars)
    encoding = common.get_encoding('scar', common.get_fillvalue(args),
                                   comp_level, args)

    common.write_data(args, ds, output_file, encoding)
Пример #3
0
def init_dataframe(args, input_file):
    """Initialize dataframe with data from input file; convert current, temperature and pressure to SI units"""
    convert_current = 1000
    check_na = -999

    with open(input_file) as stream:
        for line in stream:
            input_file_vars = [x.strip() for x in line.split(' ') if x]
            break

    df, columns = common.load_dataframe('promice',
                                        input_file,
                                        1,
                                        input_file_vars=input_file_vars)
    df.replace(check_na, np.nan, inplace=True)

    temperature_vars = [
        'ta', 'ta_hygroclip', 'ts', 'tice1', 'tice2', 'tice3', 'tice4',
        'tice5', 'tice6', 'tice7', 'tice8', 'temp_logger'
    ]
    if not args.celsius:
        df.loc[:,
               temperature_vars] += common.freezing_point_temp  # Convert units to Kelvin

    pressure_vars = ['pa']
    if not args.mb:
        df.loc[:,
               pressure_vars] *= common.pascal_per_millibar  # Convert units to millibar/hPa

    df.loc[:, ['fan_current']] /= convert_current  # Convert units to Ampere

    df = df.where((pd.notnull(df)), common.get_fillvalue(args))

    return df, temperature_vars, pressure_vars
Пример #4
0
def init_dataframe(args, input_file):
    """Initialize dataframe with data from input file; convert temperature and pressure to SI units"""
    header_rows = 4

    df, columns = common.load_dataframe('nsidc', input_file, header_rows)

    # Replace missing values with NaN
    df['wspd'].replace(999, np.nan, inplace=True)
    df['visby'].replace(999999, np.nan, inplace=True)
    df['ta'].replace(9999, np.nan, inplace=True)
    df['dpt'].replace(9999, np.nan, inplace=True)
    df['slp'].replace(99999, np.nan, inplace=True)

    factor10_vars = [
        'wspd', 'ta', 'dpt', 'slp'
    ]  # Input data has scaling factor of 10 for these variables
    df.loc[:, factor10_vars] /= 10  # Divide by 10 to get original values

    temperature_vars = ['ta', 'dpt']
    if not args.celsius:
        df.loc[:,
               temperature_vars] += common.freezing_point_temp  # Convert units to Kelvin

    pressure_vars = ['slp']
    # Pressure already in hPa
    # if not args.mb:
    #    df.loc[:, pressure_vars] *= common.pascal_per_millibar  # Convert units to millibar/hPa

    df = df.where((pd.notnull(df)), common.get_fillvalue(args))

    return df, temperature_vars, pressure_vars
Пример #5
0
def init_dataframe(args, input_file):
    """Initialize dataframe with data from input file; convert temperature and speed to SI units"""
    knot_to_ms = 0.514444
    header_rows = 0
    with open(input_file) as stream:
        for line in stream:
            header_rows += 1
            if len(line.strip()) == 0:
                break

    count = 0
    with open(input_file) as stream:
        for line in stream:
            if count == 0:
                stn_name = line.strip()
            if count == 1:
                country = line[12:].strip()
            if count == 2:
                parts = line.split(' ')
                lat = float(parts[1])
                lon = float(parts[3])
                height = float(parts[5].strip()[:-1])
            if count == 3:
                input_file_vars = [
                    x.split('(')[0].strip() for x in line[16:].split(',')
                ]
            if count == 4:
                check_na = int(line.strip().split(' ')[-1])
            if count == 5:
                institution = line[16:].strip().lstrip('the ')

            count += 1

            if count == 6:
                break

    df, columns = common.load_dataframe('scar',
                                        input_file,
                                        header_rows,
                                        input_file_vars=input_file_vars)
    df.replace(check_na, np.nan, inplace=True)

    temperature_vars = ['ta']
    if not args.celsius:
        df.loc[:,
               temperature_vars] += common.freezing_point_temp  # Convert units to Kelvin

    pressure_vars = ['pa']
    if not args.mb:
        df.loc[:,
               pressure_vars] *= common.pascal_per_millibar  # Convert units to millibar/hPa

    df.loc[:, 'wspd'] *= knot_to_ms  # Convert units to meter per second

    df = df.where((pd.notnull(df)), common.get_fillvalue(args))

    return df, temperature_vars, pressure_vars, stn_name, lat, lon, height, country, institution
Пример #6
0
def gcnet2nc(args, input_file, output_file, stations):
    """Main function to convert GCNet ascii file to netCDF"""
    df, temperature_vars, pressure_vars = init_dataframe(args, input_file)
    station_number = df['station_number'][0]
    df.drop('station_number', axis=1, inplace=True)

    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    # surface_temp = extrapolate_temp(df)

    common.log(args, 2, 'Retrieving latitude, longitude and station name')
    latitude, longitude, station_name = get_station(args, input_file, stations)

    common.log(args, 3, 'Calculating time and sza')
    month, day, hour, minutes, time, time_bounds, sza, az, first_date, last_date = get_time_and_sza(
        args, df, longitude, latitude)

    common.log(args, 4, 'Calculating quality control variables')
    fill_dataset_quality_control(df, ds, input_file)

    if args.flx:
        common.log(args, 5, 'Calculating Sensible and Latent Heat Fluxes')
        sh, lh = gradient_fluxes(df)
        ds['sh'] = 'time', sh
        ds['lh'] = 'time', lh

    if args.no_drv_tm:
        pass
    else:
        ds['month'] = 'time', month
        ds['day'] = 'time', day
        ds['hour'] = 'time', hour
        ds['minutes'] = 'time', minutes

    ds['time'] = 'time', time
    ds['time_bounds'] = ('time', 'nbnd'), time_bounds
    ds['sza'] = 'time', sza
    ds['az'] = 'time', az
    ds['station_number'] = tuple(), station_number
    ds['station_name'] = tuple(), station_name
    ds['latitude'] = tuple(), latitude
    ds['longitude'] = tuple(), longitude
    # ds['surface_temp'] = 'time', surface_temp

    rigb_vars = []
    if args.rigb:
        ds, rigb_vars = common.call_rigb(
            args, station_name, first_date, last_date, ds, latitude, longitude, rigb_vars)

    comp_level = args.dfl_lvl

    common.load_dataset_attributes('gcnet', ds, args, rigb_vars=rigb_vars, temperature_vars=temperature_vars,
                                   pressure_vars=pressure_vars)
    encoding = common.get_encoding('gcnet', common.get_fillvalue(args), comp_level, args)

    common.write_data(args, ds, output_file, encoding)
Пример #7
0
def imau2nc(args, input_file, output_file, stations):
    """Main function to convert IMAU ascii file to netCDF"""
    with open(input_file) as stream:
        line = stream.readline()
        var_count = len(line.split(','))

    errmsg = 'Unknown sub-type of IMAU network. Antarctic stations have 31 columns while Greenland stations have 35. ' \
             'Your dataset has {} columns.'.format(var_count)
    if var_count == 31:
        sub_type = 'imau/ant'
    elif var_count == 35:
        sub_type = 'imau/grl'
    else:
        raise RuntimeError(errmsg)

    df, temperature_vars, pressure_vars = init_dataframe(args, input_file, sub_type)
    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    common.log(args, 2, 'Retrieving latitude, longitude and station name')
    latitude, longitude, station_name = get_station(args, input_file, stations)

    common.log(args, 3, 'Calculating time and sza')
    month, day, hour, minutes, time, time_bounds, sza, az, first_date, last_date = get_time_and_sza(
        args, df, longitude, latitude, sub_type)

    ds['month'] = 'time', month
    ds['day'] = 'time', day
    ds['hour'] = 'time', hour
    ds['minutes'] = 'time', minutes
    ds['time'] = 'time', time
    ds['time_bounds'] = ('time', 'nbnd'), time_bounds
    ds['sza'] = 'time', sza
    ds['az'] = 'time', az
    ds['station_name'] = tuple(), station_name
    ds['latitude'] = tuple(), latitude
    ds['longitude'] = tuple(), longitude

    rigb_vars = []
    if args.rigb:
        ds, rigb_vars = common.call_rigb(
            args, station_name, first_date, last_date, ds, latitude, longitude, rigb_vars)

    comp_level = args.dfl_lvl

    common.load_dataset_attributes(sub_type, ds, args, rigb_vars=rigb_vars, temperature_vars=temperature_vars,
                                   pressure_vars=pressure_vars)
    encoding = common.get_encoding(sub_type, common.get_fillvalue(args), comp_level, args)

    common.write_data(args, ds, output_file, encoding)
Пример #8
0
def convert_coordinates(args, dataframe):
    """Convert latitude_GPS and longitude_GPS units from ddmm to degrees"""
    fillvalue = common.get_fillvalue(args)
    # Exclude NAs
    df1 = dataframe[dataframe.latitude_GPS != fillvalue]
    df2 = dataframe[dataframe.longitude_GPS != fillvalue]

    def lat_lon_gps(coords):
        deg = np.floor(coords / 100)
        minutes = np.floor(((coords / 100) - deg) * 100)
        seconds = (((coords / 100) - deg) * 100 - minutes) * 100
        return deg + minutes / 60 + seconds / 3600

    dataframe.latitude_GPS = lat_lon_gps(df1.latitude_GPS)
    dataframe.longitude_GPS = lat_lon_gps(df2.longitude_GPS)
Пример #9
0
def promice2nc(args, input_file, output_file, stations):
    """Main function to convert PROMICE txt file to netCDF"""
    df, temperature_vars, pressure_vars = init_dataframe(args, input_file)
    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    common.log(args, 2, 'Retrieving latitude, longitude and station name')
    latitude, longitude, station_name = get_station(args, input_file, stations)

    common.log(args, 3, 'Calculating time and sza')
    time, time_bounds, sza, az, first_date, last_date = get_time_and_sza(
        args, df, longitude, latitude)

    common.log(args, 4, 'Converting lat_GPS and lon_GPS')
    convert_coordinates(args, df)

    common.log(args, 5, 'Calculating ice velocity')
    fill_ice_velocity(args, df, ds)

    ds['time'] = 'time', time
    ds['time_bounds'] = ('time', 'nbnd'), time_bounds
    ds['sza'] = 'time', sza
    ds['az'] = 'time', az
    ds['station_name'] = tuple(), station_name
    ds['latitude'] = tuple(), latitude
    ds['longitude'] = tuple(), longitude

    rigb_vars = []
    if args.rigb:
        ds, rigb_vars = common.call_rigb(args, station_name, first_date,
                                         last_date, ds, latitude, longitude,
                                         rigb_vars)

    comp_level = args.dfl_lvl

    common.load_dataset_attributes('promice',
                                   ds,
                                   args,
                                   rigb_vars=rigb_vars,
                                   temperature_vars=temperature_vars,
                                   pressure_vars=pressure_vars)
    encoding = common.get_encoding('promice', common.get_fillvalue(args),
                                   comp_level, args)

    common.write_data(args, ds, output_file, encoding)
Пример #10
0
def init_dataframe(args, input_file):
    """Initialize dataframe with data from input file; convert temperature and pressure to SI units"""
    check_na = 999.0

    global header_rows
    header_rows = 0
    with open(input_file) as stream:
        for line in stream:
            header_rows += 1
            if len(line.strip()) == 0 :
                break

    df, columns = common.load_dataframe('gcnet', input_file, header_rows)

    # Convert only if this column is present in input file
    try:
        df['qc25'] = df['qc25'].astype(str)  # To avoid 999 values marked as N/A
    except Exception:
        pass

    df.replace(check_na, np.nan, inplace=True)

    temperature_vars = [
        'ta_tc1', 'ta_tc2', 'ta_cs1', 'ta_cs2',
        'tsn1', 'tsn2', 'tsn3','tsn4', 'tsn5',
        'tsn6', 'tsn7', 'tsn8', 'tsn9', 'tsn10',
        'ta_max1', 'ta_max2', 'ta_min1','ta_min2', 'ref_temp']
    if not args.celsius:
        df.loc[:, temperature_vars] += common.freezing_point_temp  # Convert units to Kelvin

    pressure_vars = ['ps']
    if not args.mb:
        df.loc[:, pressure_vars] *= common.pascal_per_millibar  # Convert units to millibar/hPa

    df = df.where((pd.notnull(df)), common.get_fillvalue(args))

    try:
        df['qc25'] = df['qc25'].astype(int)  # Convert it back to int
    except Exception:
        pass

    return df, temperature_vars, pressure_vars
Пример #11
0
def get_ice_velocity(args, dataframe, delta_x, delta_y):
    """Calculate GPS-derived ice velocity using Haversine formula"""
    num_rows = dataframe['year'].size
    R = 6373.0  # Approx radius of earth
    fillvalue = common.get_fillvalue(args)

    velocity = []
    for idx in range(num_rows - 1):
        if any(i == fillvalue for i in (dataframe['latitude_GPS'][idx],
                                        dataframe['longitude_GPS'][idx],
                                        dataframe['latitude_GPS'][delta_x],
                                        dataframe['longitude_GPS'][delta_y])):
            velocity.append(fillvalue)
        else:
            lat1 = np.radians(dataframe['latitude_GPS'][idx])
            lon1 = np.radians(dataframe['longitude_GPS'][idx])
            lat2 = np.radians(dataframe['latitude_GPS'][delta_x])
            lon2 = np.radians(dataframe['longitude_GPS'][delta_y])

            dlat = lat2 - lat1
            dlon = lon2 - lon1

            a = (np.sin(dlat / 2)**2 +
                 np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2)
            c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

            # Multiplied by 1000 to convert km to meters
            distance = (R * c) * 1000
            # Divided by 3600 because time change
            # between 2 records is one hour
            velocity.append(round(distance / common.seconds_in_hour, 4))

        delta_x += 1
        delta_y += 1

    velocity.append(0)
    return velocity
Пример #12
0
def nsidc2nc(args, input_file, output_file, stations):
    """Main function to convert NSIDC txt file to netCDF"""
    df, temperature_vars, pressure_vars = init_dataframe(args, input_file)
    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    common.log(args, 2, 'Retrieving latitude, longitude and station name')
    latitude, longitude, station_name, elevation, qlty_ctrl = get_station(
        args, input_file, stations)

    common.log(args, 3, 'Calculating time and sza')
    time, time_bounds, sza, day_of_year, year1900 = get_time_and_sza(
        args, df, latitude, longitude)

    ds['day_of_year'] = 'time', day_of_year
    ds['time'] = 'time', time
    ds['time_bounds'] = ('time', 'nbnd'), time_bounds
    ds['sza'] = 'time', sza
    ds['station_name'] = tuple(), station_name
    ds['latitude'] = tuple(), latitude
    ds['longitude'] = tuple(), longitude
    ds['elevation'] = tuple(), elevation

    comp_level = args.dfl_lvl

    common.load_dataset_attributes('nsidc',
                                   ds,
                                   args,
                                   temperature_vars=temperature_vars,
                                   pressure_vars=pressure_vars,
                                   qlty_ctrl=qlty_ctrl,
                                   year1900=year1900)
    encoding = common.get_encoding('nsidc', common.get_fillvalue(args),
                                   comp_level, args)

    common.write_data(args, ds, output_file, encoding)