Python get_pattern_subgroups_from_string примеры, aodncore.util.misc.get_pattern_subgroups_from_string Python примеры использования

Пример #1

0

Показать файл

Файл: aodn_wave_dm.py Проект: aodn/python-aodndata

def dest_path_aodn_wave_dm(filepath):
    file_basename = os.path.basename(filepath)
    with Dataset(filepath, mode='r') as nc_obj:
        site_name = nc_obj.site_name

    if BOM_WAVERIDER.match(file_basename):
        data_base_dir = os.path.join(BOM_DIR, WAVERIDER_DIR, DELAYED_DIR)
        product_dir = site_name.replace(' ', '_')

    elif DES_QLD_WAVERIDER.match(file_basename):
        data_base_dir = os.path.join(DES_QLD_DIR, WAVERIDER_DIR, DELAYED_DIR)
        fields = get_pattern_subgroups_from_string(file_basename,
                                                   DES_QLD_WAVERIDER)
        product_dir = fields['site_code']

    elif DOT_WA_WAVERIDER.match(file_basename):
        data_base_dir = os.path.join(DOT_WA_DIR, WAVERIDER_DIR, DELAYED_DIR)
        fields = get_pattern_subgroups_from_string(file_basename,
                                                   DOT_WA_WAVERIDER)
        product_dir = os.path.join(site_name.replace(' ', '_'),
                                   fields['site_code'])

    elif MHL_WAVERIDER.match(file_basename):
        data_base_dir = os.path.join(MHL_DIR_BASE, MHL_DIR, MHL_WAVERIDER_DIR)
        product_dir = site_name.replace(' ', '_')

    elif DOT_WA_AWAC.match(file_basename):
        data_base_dir = os.path.join(DOT_WA_DIR, AWAC_DIR, DELAYED_DIR)
        fields = get_pattern_subgroups_from_string(file_basename, DOT_WA_AWAC)
        product_dir = fields['site_code']

    elif DTA_NZ_WAVERIDER.match(file_basename):
        data_base_dir = os.path.join(DTA_NZ_DIR, WAVERIDER_DIR, DELAYED_DIR)
        if 'Wave Rider Buoy' not in site_name:
            raise InvalidFileContentError(
                "file name: \"{filename}\"; global attribute site_code does not contain 'Wave Rider Buoy' string to " \
                "deduce path".format(filename=file_basename))
        product_dir = site_name.replace('Wave Rider Buoy',
                                        '').strip().replace(' ', '_')

    elif NTP_WAVE.match(file_basename):
        data_base_dir = os.path.join(NTP_WAVE_DIR, WAVERIDER_DIR, DELAYED_DIR)
        if len(site_name) == 0:
            raise InvalidFileContentError(
                "file name: \"{filename}\"; global attribute site_name is empty"
                .format(filename=file_basename))
        product_dir = site_name

    else:
        raise InvalidFileNameError(
            "file name: \"{filename}\" not matching regex to deduce path".
            format(filename=file_basename))

    return os.path.join(data_base_dir, product_dir, os.path.basename(filepath))

Пример #2

0

Показать файл

def get_creation_date(filepath):
    """ :return: creation date    """
    file_basename = os.path.basename(filepath)
    if GSLA_REGEX.match(file_basename):
        fields = get_pattern_subgroups_from_string(file_basename, GSLA_REGEX)

    elif GSLA_REGEX_YEARLY.match(file_basename):
        fields = get_pattern_subgroups_from_string(file_basename,
                                                   GSLA_REGEX_YEARLY)

    else:
        raise InvalidFileNameError(
            "file name: \"{filename}\" not matching regex to deduce creation_date"
            .format(filename=file_basename))

    return datetime.strptime(fields['creation_date'], '%Y%m%dT%H%M%SZ')

Пример #3

0

Показать файл

Файл: test_srs_oc_gridded.py Проект: aodn/python-aodndata

    def test_get_fields_from_filename(self):
        fields = get_pattern_subgroups_from_string(SRS_GOOD,
                                                   IMOS_OC_FILE_PATTERN)

        self.assertEqual(fields['data_parameter_code'], 'A')
        self.assertEqual(fields['time_coverage_resolution'], 'P1D')
        self.assertEqual(fields['sat_pass'], 'aust')

Пример #4

0

Показать файл

Файл: test_gsla.py Проект: aodn/python-aodndata

    def test_get_fields(self):
        """ test basic function outputs"""
        fields = get_pattern_subgroups_from_string(
            os.path.basename(GOOD_NC_GZ_DM00), GSLA_REGEX)

        self.assertEqual(fields['nc_time_cov_start'], '19930101T000000Z')
        self.assertEqual(fields['creation_date'], '20130913T082343Z')
        self.assertEqual(fields['product_type'], 'DM00')

Пример #5

0

Показать файл

Файл: test_acorn.py Проект: aodn/python-aodndata

    def test_get_fields_from_filename(self):
        filename = os.path.basename(GOOD_NC_FV00)
        fields = get_pattern_subgroups_from_string(filename,
                                                   ACORN_FILE_PATTERN)

        self.assertEqual(fields['data_parameter_code'], 'V')
        self.assertEqual(fields['platform_code'], 'ROT')
        self.assertEqual(fields['product_type'], '1-hour-avg')

Пример #6

0

Показать файл

Файл: srs_oc_gridded.py Проект: aodn/python-aodndata

    def dest_path(filepath):
        file_basename = os.path.basename(filepath)

        # NON CONTRIBUTED DATA SET
        if IMOS_OC_FILE_PATTERN.match(file_basename):
            fields = get_pattern_subgroups_from_string(file_basename, IMOS_OC_FILE_PATTERN)
            nc_time_cov_start = datetime.strptime(fields['nc_time_cov_start'], '%Y%m%dT%H%M%SZ')
            data_parameter_code = fields['data_parameter_code']

            if data_parameter_code == 'A':
                product_name = 'aqua'
            elif data_parameter_code == 'S':
                product_name = 'seawifs'
            elif data_parameter_code == 'V':
                product_name = 'viirs'

            path = os.path.join(OC_GRIDDED_PREFIX_PATH, product_name, fields['time_coverage_resolution'],
                                '%d' % nc_time_cov_start.year, '%02d' % nc_time_cov_start.month,
                                file_basename)
            return path

        # CONTRIBUTED DATA SET
        elif RJOHNSON_FILE_PATTERN.match(file_basename):
            fields = get_pattern_subgroups_from_string(file_basename, RJOHNSON_FILE_PATTERN)
            data_parameter_code = fields['data_parameter_code']
            time_coverage_resolution =  fields['time_coverage_resolution']

            if data_parameter_code == 'A':
                product_name = 'aqua'
            elif data_parameter_code == 'S':
                product_name = 'seawifs'

            if time_coverage_resolution == '8D':
                time_cov = '8d'
            elif time_coverage_resolution == 'MO':
                time_cov = '1m'

            return os.path.join(OC_GRIDDED_PREFIX_PATH, 'contributed', 'SO-Johnson',
                                'chl', time_cov, product_name, file_basename)

        else:
            raise InvalidFileNameError("file name: \"{filename}\" not matching regex to deduce dest_path".
                                       format(filename=file_basename))

Пример #7

0

Показать файл

Файл: srs_ghrsst.py Проект: aodn/python-aodndata

    def dest_path(filepath):
        file_basename = os.path.basename(filepath)

        if L3P_FILE_PATTERN.match(file_basename):
            fields = get_pattern_subgroups_from_string(file_basename,
                                                       L3P_FILE_PATTERN)
            year = datetime.strptime(fields['nc_time_cov_start'],
                                     '%Y%m%d').year
            return os.path.join(GHRSST_PREFIX_PATH, fields['product_type'],
                                '14d', str(year), file_basename)

        if L4_FILE_PATTERN.match(file_basename):
            fields = get_pattern_subgroups_from_string(file_basename,
                                                       L4_FILE_PATTERN)
            year = datetime.strptime(fields['nc_time_cov_start'],
                                     '%Y%m%d%H%M%S').year
            return os.path.join(GHRSST_PREFIX_PATH,
                                fields['product_type'], fields['product_name'],
                                str(year), file_basename)

        file_info = get_info_nc(filepath)

        if file_info['sat_value'] is None:
            path = os.path.join(GHRSST_PREFIX_PATH, file_info['product_path'],
                                file_info['day_time'],
                                str(file_info['date_data'].year),
                                file_basename)

        elif file_info['day_time'] is None:
            path = os.path.join(GHRSST_PREFIX_PATH, file_info['product_path'],
                                file_info['sat_value'],
                                str(file_info['date_data'].year),
                                file_basename)

        else:
            path = os.path.join(GHRSST_PREFIX_PATH, file_info['product_path'],
                                file_info['day_time'], file_info['sat_value'],
                                str(file_info['date_data'].year),
                                file_basename)

        return path

Пример #8

0

Показать файл

def get_type(filepath):
    """return acorn_file_type, the file type of an ACORN file based on its filename"""
    file_basename = os.path.basename(filepath)
    unknown_product = False
    if ACORN_FILE_PATTERN.match(file_basename):
        fields = get_pattern_subgroups_from_string(file_basename,
                                                   ACORN_FILE_PATTERN)
        product_type = fields['product_type']
        file_version = fields['file_version']
        platform_code = fields['platform_code']

        if product_type == 'radial' and file_version == 'FV00':
            acorn_file_type = "radial"

        elif product_type == 'radial' and file_version == 'FV01':
            acorn_file_type = "radial_quality_controlled"

        elif product_type == 'sea-state' and file_version == 'FV00':
            acorn_file_type = "vector"

        elif product_type == 'wavespec' and file_version == 'FV01':
            acorn_file_type = "gridded_1h-avg-wave-spectra_QC"

        elif product_type == 'windp' and file_version == 'FV01':
            acorn_file_type = "gridded_1h-avg-wind-map_QC"

        elif product_type == 'wavep' and file_version == 'FV01':
            site_map_station = ['CBG', 'SAG', 'ROT', 'COF']

            if any(s == platform_code for s in site_map_station):
                acorn_file_type = "gridded_1h-avg-wave-site-map_QC"
            else:
                acorn_file_type = "gridded_1h-avg-wave-station-map_QC"

        elif product_type == '1-hour-avg' and file_version == 'FV00':
            acorn_file_type = "gridded_1h-avg-current-map_non-QC"

        elif product_type == '1-hour-avg' and file_version == 'FV01':
            acorn_file_type = "gridded_1h-avg-current-map_QC"

        else:
            unknown_product = True
    else:
        unknown_product = True

    if unknown_product:
        raise InvalidFileNameError(
            "file name: \"{filename}\" Unknown product type from filename".
            format(filename=file_basename))

    return acorn_file_type

Пример #9

0

Показать файл

def get_gsla_type(filepath):
    """ :return:  gsla file type """
    file_basename = os.path.basename(filepath)
    if GSLA_REGEX.match(file_basename):
        fields = get_pattern_subgroups_from_string(file_basename, GSLA_REGEX)
        return fields['product_type']

    elif GSLA_REGEX_YEARLY.match(file_basename):
        return os.path.join(get_product_type(filepath), 'yearfiles')

    else:
        raise InvalidFileNameError(
            "file name: \"{filename}\" not matching regex to deduce dest_path".
            format(filename=file_basename))

Пример #10

0

Показать файл

    def dest_path(filepath):
        file_basename = os.path.basename(filepath)
        file_type = get_type(filepath)

        fields = get_pattern_subgroups_from_string(file_basename,
                                                   ACORN_FILE_PATTERN)
        nc_time_cov_start = datetime.strptime(fields['nc_time_cov_start'],
                                              '%Y%m%dT%H%M%SZ')

        return os.path.join('IMOS', 'ACORN', file_type,
                            fields['platform_code'],
                            '%d' % nc_time_cov_start.year,
                            '%02d' % nc_time_cov_start.month,
                            '%02d' % nc_time_cov_start.day, file_basename)

Пример #11

0

Показать файл

    def dest_path(filepath):
        " Netcdf only as an input. Not nc.gz"
        file_basename = os.path.basename(filepath)
        gsla_type = get_gsla_type(filepath)

        if GSLA_REGEX_YEARLY.match(file_basename):
            dest_path_val = os.path.join(GSLA_PREFIX_PATH, gsla_type,
                                         file_basename)

        else:
            fields = get_pattern_subgroups_from_string(file_basename,
                                                       GSLA_REGEX)
            gsla_year = datetime.strptime(fields['nc_time_cov_start'],
                                          '%Y%m%dT%H%M%SZ').year
            dest_path_val = os.path.join(GSLA_PREFIX_PATH, gsla_type,
                                         str(gsla_year), file_basename)

        # destination path should always be for nc.gz files. So we force it
        return '{val}.gz'.format(val=dest_path_val) if dest_path_val.endswith(
            '.nc') else dest_path_val

Пример #12

0

Показать файл

Файл: srs_ghrsst.py Проект: aodn/python-aodndata

def get_info_nc(filepath):
    file_basename = os.path.basename(filepath)

    if L3S_L3C_FILE_PATTERN.match(file_basename):
        fields = get_pattern_subgroups_from_string(file_basename,
                                                   L3S_L3C_FILE_PATTERN)
        day_time = fields['day_time']
        temporal_extent = fields['temporal_extent']
    elif L3U_FILE_PATTERN.match(file_basename):
        fields = get_pattern_subgroups_from_string(file_basename,
                                                   L3U_FILE_PATTERN)
        day_time = None
        temporal_extent = None
    elif L3S_MULTISENSOR_FILE_PATTERN.match(file_basename):
        fields = get_pattern_subgroups_from_string(
            file_basename, L3S_MULTISENSOR_FILE_PATTERN)
        day_time = fields['day_time']
        temporal_extent = fields['temporal_extent']
        fields['product_type'] = '%sM' % fields['product_type']
    elif L3U_VIIRS_FILE_PATTERN.match(file_basename):
        fields = get_pattern_subgroups_from_string(file_basename,
                                                   L3U_VIIRS_FILE_PATTERN)
        day_time = ''
        temporal_extent = None
        fields['sat_value'] = 'snpp'
    elif L3C_VIIRS_FILE_PATTERN.match(file_basename):
        fields = get_pattern_subgroups_from_string(file_basename,
                                                   L3C_VIIRS_FILE_PATTERN)
        day_time = fields['day_time']
        temporal_extent = fields['temporal_extent']
        fields['sat_value'] = 'snpp'
    else:
        raise InvalidFileNameError(
            "file name: \"{filename}\" not matching regex to deduce dest_path".
            format(filename=os.path.basename(filepath)))

    prod_lev = fields['product_type']

    if day_time == 'night':
        day_time = 'ngt'

    date_nc = datetime.strptime(fields['nc_time_cov_start'], '%Y%m%d%H%M%S')

    sat_value = fields.get('sat_value', '')
    if sat_value.isdigit():
        sat_value = 'n%s' % sat_value

    if prod_lev != 'L3U':
        product_path = '%s-%s' % (prod_lev, temporal_extent)
    else:
        product_path = prod_lev

    if 'Southern' in filepath:
        if '-' in product_path:
            product_path = '%sS' % product_path
        else:
            product_path = '%s-%s' % (product_path, 'S')

    file_info = {
        'prod_level': prod_lev,
        'temporal_extent': temporal_extent,
        'day_time': day_time,
        'date_data': date_nc,
        'sat_value': sat_value,
        'product_path': product_path
    }

    return file_info

Пример #13

0

Показать файл

def netcdf_writer(log_path, output_dir, ship_name, meta_path=[]):
    if meta_path != []:
        with open(meta_path, 'r') as f:
            meta_data = json.loads('\n'.join([
                row for row in f.readlines() if len(row.split('#')) == 1
            ]))  # remove comments
            for ii in range(len(meta_data['calibration'])):
                if meta_data['calibration'][ii]['item'] == 'EFLO':
                    calibration_flo_a0 = float(
                        meta_data['calibration'][ii]['a0'])
                    calibration_flo_a1 = float(
                        meta_data['calibration'][ii]['a1'])
                if meta_data['calibration'][ii]['item'] == 'ESAL':
                    calibration_sal_a0 = float(
                        meta_data['calibration'][ii]['a0'])
                    calibration_sal_a1 = float(
                        meta_data['calibration'][ii]['a1'])
                if meta_data['calibration'][ii]['item'] == 'ETMP':
                    calibration_tmp_a0 = float(
                        meta_data['calibration'][ii]['a0'])
                    calibration_tmp_a1 = float(
                        meta_data['calibration'][ii]['a1'])
                if meta_data['calibration'][ii]['item'] == 'ETURB':
                    calibration_turb_a0 = float(
                        meta_data['calibration'][ii]['a0'])
                    calibration_turb_a1 = float(
                        meta_data['calibration'][ii]['a1'])

    df = parse_log_file(log_path)
    df = transform_count_to_real_val(df)
    log_filename = os.path.basename(log_path)

    fields = get_pattern_subgroups_from_string(log_filename,
                                               SOOP_NRT_LOG_PATTERN)
    product_code = fields['product_code']

    if product_code in ['D2M', 'M2D', 'S2M', 'M2S']:
        product_type = "transect"
        feature_type = "trajectory"
        template = DatasetTemplate.from_json(NC_JSON_TEMPLATE_TRAJECTORY)
    elif product_code in ['DEV', 'MEL', 'SYD']:
        product_type = "mooring"
        feature_type = "timeSeries"
        template = DatasetTemplate.from_json(NC_JSON_TEMPLATE_MOORING)
    else:
        raise InvalidFileNameError(
            "SOOP NRT input logfile has incorrect product_code '{product_code}'. Not belonging to any of "
            "('D2M', 'M2D', 'S2M', 'M2S','DEV', 'MEL', 'SYD').".format(
                product_code=product_code))

    template.global_attributes.update({'product_type': product_type})

    time_val_dateobj = date2num(df.index.to_pydatetime(),
                                template.variables['TIME']['units'],
                                template.variables['TIME']['calendar'])

    # replace all nan with FillValue from template value
    df.replace(np.nan,
               template.variables['LATITUDE']['_FillValue'],
               inplace=True)

    template.variables['TIME']['_data'] = time_val_dateobj
    template.variables['LATITUDE']['_data'] = df.LATITUDE.values
    template.variables['LONGITUDE']['_data'] = df.LONGITUDE.values

    template.variables['TEMP']['_data'] = df.TEMP.values
    template.variables['PSAL']['_data'] = df.PSAL.values
    template.variables['TURB']['_data'] = df.TURB.values
    template.variables['CPHL']['_data'] = df.CPHL.values

    calibration_comment = 'Value=a0 + a1 x raw_value'
    if 'calibration_tmp_a0' in locals() and 'calibration_tmp_a1' in locals():
        template.variables['TEMP']['a0'] = calibration_tmp_a0
        template.variables['TEMP']['a1'] = calibration_tmp_a1
        template.variables['TEMP']['calibration_comment'] = calibration_comment

    if 'calibration_sal_a0' in locals() and 'calibration_sal_a1' in locals():
        template.variables['PSAL']['a0'] = calibration_sal_a0
        template.variables['PSAL']['a1'] = calibration_sal_a1
        template.variables['PSAL']['calibration_comment'] = calibration_comment

    if 'calibration_turb_a0' in locals() and 'calibration_turb_a1' in locals():
        template.variables['TURB']['a0'] = calibration_turb_a0
        template.variables['TURB']['a1'] = calibration_turb_a1
        template.variables['TURB']['calibration_comment'] = calibration_comment

    if 'calibration_flo_a0' in locals() and 'calibration_flo_a1' in locals():
        template.variables['CPHL']['a0'] = calibration_flo_a0
        template.variables['CPHL']['a1'] = calibration_flo_a1
        template.variables['CPHL']['calibration_comment'] = calibration_comment

    measurement_frequency = get_measurement_frequency(df)
    if measurement_frequency == 1:
        measurement_frequency_str = '1sec'
    elif measurement_frequency == 10:
        measurement_frequency_str = '10secs'

    template.global_attributes.update({
        'time_coverage_start':
        df.index.strftime('%Y-%m-%dT%H:%M:%SZ')[0],
        'time_coverage_end':
        df.index.strftime('%Y-%m-%dT%H:%M:%SZ')[-1],
        'featureType':
        feature_type,
        'date_created':
        datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
        'platform_code':
        SHIP_CODE,
        'vessel_name':
        ship_name,
        'geospatial_lat_min':
        df.LATITUDE.dropna().min(),
        'geospatial_lat_max':
        df.LATITUDE.dropna().max(),
        'geospatial_lon_min':
        df.LONGITUDE.dropna().min(),
        'geospatial_lon_max':
        df.LONGITUDE.dropna().max(),
        'measurement_frequency':
        measurement_frequency_str,
        'history':
        "File created {date_created}".format(
            date_created=datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"))
    })

    if measurement_frequency == 1:
        template.variables['CPHL'].update({
            'calibration_blank':
            CHLU_PARAMS['blank'],
            'calibration_scale':
            CHLU_PARAMS['scale']
        })

        template.variables['TURB'].update({
            'calibration_blank':
            TURB_PARAMS['blank'],
            'calibration_scale':
            TURB_PARAMS['scale']
        })

    nc_filename = 'IMOS_SOOP-TMV_TSUB_{time_start}_{vessel_code}_FV0{product_number}_{product_type}-{product_code}_END-{time_end}.nc'.format(
        time_start=df.index.strftime('%Y%m%dT%H%M%SZ')[0],
        time_end=df.index.strftime('%Y%m%dT%H%M%SZ')[-1],
        vessel_code=SHIP_CODE,
        product_number=0,
        product_type=product_type,
        product_code=product_code)

    netcdf_path = os.path.join(output_dir, nc_filename)
    template.to_netcdf(netcdf_path)
    return netcdf_path

Python get_pattern_subgroups_from_string примеры использования