Пример #1
0
def test_save_all_eofs_to_dir(tmp_path):
    lat = np.array([-10., 0., 10.])
    long = np.array([0., 5.])
    eofs = []
    for doy in range(1, 367):
        eof1 = np.array([1, 2, 3, 4, 5, 6]) * doy
        eof2 = np.array([10, 20, 30, 40, 50, 60]) * doy
        eofs.append(eof.EOFData(lat, long, eof1, eof2))
    target = eof.EOFDataForAllDOYs(eofs)

    print(tmp_path)

    errors = []
    with pytest.raises(FileNotFoundError) as e:
        target.save_all_eofs_to_dir(tmp_path / "eofs_dir_not_exisiting", create_dir=False)
    if "No such file or directory" not in str(e.value):
        errors.append("Test target should raise error, because directory does not exist.")

    target.save_all_eofs_to_dir(tmp_path / "eofs")
    target_reloaded = eof.load_all_eofs_from_directory(tmp_path / "eofs")
    if not target_reloaded.eof_list == eofs:
        errors.append("List of EOFData objects incorrect")
    if not np.all(target_reloaded.lat == lat):
        errors.append("Lat is incorrect")
    if not np.all(target_reloaded.long == long):
        errors.append("Long is incorrect")
    if not target_reloaded.eofdata_for_doy(1) == eofs[0]:
        errors.append("Sample EOF data is incorrect")

    assert not errors, "errors occurred:\n{}".format("\n".join(errors))
Пример #2
0
def test_EOFDataForAllDOYs_doy_getfunctions():
    lat = np.array([-10., 0., 10.])
    long = np.array([0., 5.])
    eofs = []
    for doy in range(1, 367):
        eof1 = np.array([1, 2, 3, 4, 5, 6]) * doy
        eof2 = np.array([10, 20, 30, 40, 50, 60]) * doy
        eofs.append(eof.EOFData(lat, long, eof1, eof2))
    target = eof.EOFDataForAllDOYs(eofs)

    errors = []
    if not target.eofdata_for_doy(1).eof1vector[0] == 1:
        errors.append("EofData for DOY 1 incorrect")
    if not target.eofdata_for_doy(12).eof1vector[1] == 24:
        errors.append("EofData for DOY 1 incorrect")

    if not target.eof1vector_for_doy(1)[0] == 1:
        errors.append("EOF1Vector for DOY 1 incorrect")
    if not target.eof1vector_for_doy(12)[1] == 24:
        errors.append("EOF1Vector for DOY 1 incorrect")

    if not target.eof2vector_for_doy(1)[0] == 10:
        errors.append("EOF2Vector for DOY 1 incorrect")
    if not target.eof2vector_for_doy(12)[1] == 240:
        errors.append("EOF2Vector for DOY 1 incorrect")

    assert not errors, "errors occurred:\n{}".format("\n".join(errors))
Пример #3
0
def test_EOFDataForAllDOYs_initialization_exceptions():
    lat = np.array([-10., 0., 10.])
    long = np.array([0., 5.])

    errors = []

    # one DOY missing
    eofs = []
    for doy in range(1, 366):
        eof1 = np.array([1, 2, 3, 4, 5, 6]) * doy
        eof2 = np.array([10, 20, 30, 40, 50, 60]) * doy
        eofs.append(eof.EOFData(lat, long, eof1, eof2))
    with pytest.raises(ValueError) as e:
        target = eof.EOFDataForAllDOYs(eofs)
    if "contain 366" not in str(e.value):
        errors.append("Check for 366 DOYs failed.")

    # wrong latitude
    eofs = []
    for idx in range(1, 367):
        eof1 = np.array([1, 2, 3, 4, 5, 6]) * idx
        eof2 = np.array([10, 20, 30, 40, 50, 60]) * idx
        corrupt = 1.
        if idx == 201:
            corrupt = 3.
        eofs.append(eof.EOFData(corrupt * lat, long, eof1, eof2))
    with pytest.raises(ValueError) as e:
        target = eof.EOFDataForAllDOYs(eofs)
    if "DOY 200" not in str(e.value):
        errors.append("Check for same latitudes failed.")

    # wrong longitude
    eofs = []
    for idx in range(1, 367):
        eof1 = np.array([1, 2, 3, 4, 5, 6]) * idx
        eof2 = np.array([10, 20, 30, 40, 50, 60]) * idx
        corrupt = 1.
        if idx == 101:
            corrupt = 2.
        eofs.append(eof.EOFData(lat, corrupt * long, eof1, eof2))
    with pytest.raises(ValueError) as e:
        target = eof.EOFDataForAllDOYs(eofs)
    if "DOY 100" not in str(e.value):
        errors.append("Check for same latitudes failed.")

    assert not errors, "errors occurred:\n{}".format("\n".join(errors))
Пример #4
0
def correct_spontaneous_sign_changes_in_eof_series(
        eofs: eof.EOFDataForAllDOYs,
        doy1reference: bool = False) -> eof.EOFDataForAllDOYs:
    """
    Switches the signs of all pairs of EOFs (for all DOYs) if necessary, so that the signs are consistent for all DOYs.

    Note that the sign of the EOFs is not uniquely defined by the PCA. Hence, the sign may jump from one DOY to another,
    which can be improved using this function. As long as this step is performed before computing the PCs, it will not
    change the overall result.

    Generally, the sign of the EOFs for a specific DOY is changed if it differs from the sign of the EOF for the previous
    DOY. The EOFs for DOY 1 are by default aligned with the original calculation by Kiladis (2014), resulting in a
    an EOF series, which is totally comparable to the original Kiladis (2014) calculation. This can be switched off.

    :param eofs: The EOF series for which the signs should be aligned.
    :param doy1reference: If true, the EOFs of DOY 1 are aligned w.r.t to the original Kiladis (2014) calculation.

    :return: The EOFs with aligned signs.
    """
    switched_eofs = []
    if doy1reference is True:
        reference_path = Path(
            os.path.dirname(
                os.path.abspath(inspect.getfile(
                    inspect.currentframe())))) / "sign_reference"
        reference_eofs = eof.load_original_eofs_for_doy(reference_path, 1)
        if not reference_eofs.lat.size == eofs.lat.size \
                or not reference_eofs.long.size == eofs.long.size \
                or not np.all(reference_eofs.lat == eofs.lat) \
                or not np.all(reference_eofs.long == eofs.long):
            warnings.warn(
                "References for the sign of the EOFs for DOY1 have to be interpolated to spatial grid of the"
                " target EOFs. Treat results with caution.")
            f1 = scipy.interpolate.interp2d(reference_eofs.long,
                                            reference_eofs.lat,
                                            reference_eofs.eof1map,
                                            kind='linear')
            eof1map_interpol = f1(eofs.long, eofs.lat)
            f2 = scipy.interpolate.interp2d(reference_eofs.long,
                                            reference_eofs.lat,
                                            reference_eofs.eof2map,
                                            kind='linear')
            eof2map_interpol = f2(eofs.long, eofs.lat)
            reference_eofs = eof.EOFData(eofs.lat, eofs.long, eof1map_interpol,
                                         eof2map_interpol)
        corrected_doy1 = _correct_spontaneous_sign_change_of_individual_eof(
            reference_eofs, eofs.eofdata_for_doy(1))
    else:
        corrected_doy1 = eofs.eofdata_for_doy(1)
    switched_eofs.append(corrected_doy1)
    previous_eof = corrected_doy1
    for doy in tools.doy_list()[1:]:
        corrected_eof = _correct_spontaneous_sign_change_of_individual_eof(
            previous_eof, eofs.eofdata_for_doy(doy))
        switched_eofs.append(corrected_eof)
        previous_eof = corrected_eof
    return eof.EOFDataForAllDOYs(switched_eofs)
Пример #5
0
def interpolate_eofs_between_doys(eofs: eof.EOFDataForAllDOYs,
                                  start_doy: int = 293,
                                  end_doy: int = 316) -> eof.EOFDataForAllDOYs:
    """
    Replaces the EOF1 and EOF2 functions between 2 DOYs by a linear interpolation between these 2 DOYs.

    This should only rarely be used and has only been implemented to closely reproduce the original OMI values. There,
    the EOFs have also been replaced by an interpolation according to Kiladis (2014). However, the period stated in
    Kiladis (2014) from 1 November to 8 November is too short. The authors have confirmed that the right
    interpolation period is from DOY 294 to DOY 315, which is used here as default value.

    ATTENTION: The corresponding statistical values (e.g., the explained variances) are not changed by this routine.
    So these values further on represent the original results of the PCA also for the interpolated EOFs.

    :param eofs: The complete EOF series, in which the interpolation takes place.
    :param start_doy: The DOY, which is used as the first point of the interpolation (i.e. start_doy + 1 is the first
        element, which will be replaced by the interpolation.
    :param end_doy:  The DOY, which is used as the last point of the interpolation (i.e. end_doy - 1 is the last
        element, which will be replaced by the interpolation.

    :return: The complete EOF series with the interpolated values.
    """
    doys = tools.doy_list()
    start_idx = start_doy - 1
    end_idx = end_doy - 1
    eof_len = eofs.lat.size * eofs.long.size
    eofs1 = np.empty((doys.size, eof_len))
    eofs2 = np.empty((doys.size, eof_len))
    # Todo: Maybe this could be solved more efficiently
    # by using internal numpy functions for multidimenasional operations
    for (idx, doy) in enumerate(doys):
        eofs1[idx, :] = eofs.eof1vector_for_doy(doy)
        eofs2[idx, :] = eofs.eof2vector_for_doy(doy)

    for i in range(0, eof_len):
        eofs1[start_idx + 1:end_idx - 1,
              i] = np.interp(doys[start_idx + 1:end_idx - 1],
                             [doys[start_idx], doys[end_idx]],
                             [eofs1[start_idx, i], eofs1[end_idx, i]])
        eofs2[start_idx + 1:end_idx - 1,
              i] = np.interp(doys[start_idx + 1:end_idx - 1],
                             [doys[start_idx], doys[end_idx]],
                             [eofs2[start_idx, i], eofs2[end_idx, i]])
    interpolated_eofs = []
    for (idx, doy) in enumerate(doys):
        orig_eof = eofs.eofdata_for_doy(doy)
        interpolated_eofs.append(
            eof.EOFData(orig_eof.lat,
                        orig_eof.long,
                        np.squeeze(eofs1[idx, :]),
                        np.squeeze(eofs2[idx, :]),
                        explained_variances=orig_eof.explained_variances,
                        eigenvalues=orig_eof.eigenvalues,
                        no_observations=orig_eof.no_observations))
    return eof.EOFDataForAllDOYs(interpolated_eofs)
Пример #6
0
def test_if_refdata_isfound_for_correct_spontaneous_sign_changes_in_eof_series(
):
    lat = np.array([-10., 0., 10.])
    long = np.array([0., 5.])
    eofs = []
    for doy in range(1, 367):
        eof1 = np.array([1, 2, 3, 4, 5, 6]) * doy
        eof2 = np.array([10, 20, 30, 40, 50, 60]) * doy
        eofs.append(eof.EOFData(lat, long, eof1, eof2))
    eofs = eof.EOFDataForAllDOYs(eofs)

    try:
        target = omi.correct_spontaneous_sign_changes_in_eof_series(eofs, True)
    except OSError:
        pytest.fail(
            "Function failed with OS Error, hence the reference data has probably not been found, which points "
            "to an installation problem of the package: ".format(OSError))
Пример #7
0
def test_EOFDataForAllDOYs_alldoy_getfunctions():
    doys = tools.doy_list()
    lat = np.array([-10., 0., 10.])
    long = np.array([0., 5.])
    explained_variances = np.array([np.arange(1, doys.size + 1, 1) + 111,
                                    np.arange(1, doys.size + 1, 1) + 222,
                                    np.arange(1, doys.size + 1, 1) + 333,
                                    np.arange(1, doys.size + 1, 1) + 444,
                                    np.arange(1, doys.size + 1, 1) + 555,
                                    np.arange(1, doys.size + 1, 1) + 666])
    eigenvalues = np.array([np.arange(1, doys.size + 1, 1) + 1111,
                            np.arange(1, doys.size + 1, 1) + 2222,
                            np.arange(1, doys.size + 1, 1) + 3333,
                            np.arange(1, doys.size + 1, 1) + 4444,
                            np.arange(1, doys.size + 1, 1) + 5555,
                            np.arange(1, doys.size + 1, 1) + 6666])
    no_obs = doys * 5

    eofs = []
    for doy in doys:
        eof1 = np.array([1, 2, 3, 4, 5, 6]) * doy
        eof2 = np.array([10, 20, 30, 40, 50, 60]) * doy
        eofs.append(eof.EOFData(lat, long, eof1, eof2,
                                explained_variances=np.squeeze(explained_variances[:, doy - 1]),
                                eigenvalues=np.squeeze(eigenvalues[:, doy - 1]), no_observations=no_obs[doy - 1]))
    target = eof.EOFDataForAllDOYs(eofs)

    errors = []
    if not np.all(target.explained_variance1_for_all_doys() == explained_variances[0, :]):
        errors.append("Explained variance 1 incorrect")
    if not np.all(target.explained_variance2_for_all_doys() == explained_variances[1, :]):
        errors.append("Explained variance 2 incorrect")
    if not np.all(target.eigenvalue1_for_all_doys() == eigenvalues[0, :]):
        errors.append("Eigenvalue 1 incorrect")
    if not np.all(target.eigenvalue2_for_all_doys() == eigenvalues[1, :]):
        errors.append("Eigenvalue 2 incorrect")
    if not np.all(target.total_explained_variance_for_all_doys() == np.sum(explained_variances, axis=0)):
        errors.append("Total explained variance incorrect")
    if not np.all(target.no_observations_for_all_doys() == no_obs):
        errors.append("number of observations incorrect")

    assert not errors, "errors occurred:\n{}".format("\n".join(errors))
Пример #8
0
def test_EOFDataForAllDOYs_basic_properties():
    lat = np.array([-10., 0., 10.])
    long = np.array([0., 5.])
    eofs = []
    for doy in range(1, 367):
        eof1 = np.array([1, 2, 3, 4, 5, 6]) * doy
        eof2 = np.array([10, 20, 30, 40, 50, 60]) * doy
        eofs.append(eof.EOFData(lat, long, eof1, eof2))
    target = eof.EOFDataForAllDOYs(eofs)

    errors = []
    if not target.eof_list == eofs:
        errors.append("List of EOFData objects incorrect")
    if not np.all(target.lat == lat):
        errors.append("Lat is incorrect")
    if not np.all(target.long == long):
        errors.append("Long is incorrect")
    if not target.eofdata_for_doy(1) == eofs[0]:
        errors.append("Sample EOF data is incorrect")

    assert not errors, "errors occurred:\n{}".format("\n".join(errors))
Пример #9
0
def test_save_all_eofs_to_npzfile(tmp_path):
    filename = tmp_path / "test.npz"
    doys = tools.doy_list()
    lat = np.array([-10., 0., 10.])
    long = np.array([0., 5.])
    explained_variances = np.array([np.arange(1, doys.size + 1, 1) + 111,
                                    np.arange(1, doys.size + 1, 1) + 222,
                                    np.arange(1, doys.size + 1, 1) + 333,
                                    np.arange(1, doys.size + 1, 1) + 444,
                                    np.arange(1, doys.size + 1, 1) + 555,
                                    np.arange(1, doys.size + 1, 1) + 666])
    eigenvalues = np.array([np.arange(1, doys.size + 1, 1) + 1111,
                            np.arange(1, doys.size + 1, 1) + 2222,
                            np.arange(1, doys.size + 1, 1) + 3333,
                            np.arange(1, doys.size + 1, 1) + 4444,
                            np.arange(1, doys.size + 1, 1) + 5555,
                            np.arange(1, doys.size + 1, 1) + 6666])
    no_obs = doys * 5

    eofs = []
    for doy in doys:
        eof1 = np.array([1, 2, 3, 4, 5, 6]) * doy
        eof2 = np.array([10, 20, 30, 40, 50, 60]) * doy
        eofs.append(eof.EOFData(lat, long, eof1, eof2, explained_variances=np.squeeze(explained_variances[:, doy - 1]),
                                eigenvalues=np.squeeze(eigenvalues[:, doy - 1]), no_observations=no_obs[doy - 1]))
    target = eof.EOFDataForAllDOYs(eofs)
    target.save_all_eofs_to_npzfile(filename)

    errors = []
    target_reloaded = eof.restore_all_eofs_from_npzfile(filename)
    if not target_reloaded.eof_list == eofs:
        errors.append("List of EOFData objects incorrect")
    if not np.all(target_reloaded.lat == lat):
        errors.append("Lat is incorrect")
    if not np.all(target_reloaded.long == long):
        errors.append("Long is incorrect")
    if not target_reloaded.eofdata_for_doy(1) == eofs[0]:
        errors.append("Sample EOF data is incorrect")

    assert not errors, "errors occurred:\n{}".format("\n".join(errors))
Пример #10
0
def calc_eofs_from_preprocessed_olr(
        olrdata: olr.OLRData,
        implementation: str = "internal",
        strict_leap_year_treatment: bool = False) -> eof.EOFDataForAllDOYs:
    """
    Calculates a series of EOF pairs: one pair for each DOY.

    This is based on already preprocessed OLR. Note that it is recommended to use the function
    :meth:`calc_eofs_from_olr` to cover the complete algorithm.

    :param olrdata: the preprocessed OLR data, from which the EOFs are calculated.
    :param implementation: Two options are available: First, "internal": uses the internal implementation of the EOF
        approach. Second, "eofs_package": Uses the implementation of the external package :py:mod:`eofs`.
    :param strict_leap_year_treatment: see description in :meth:`mjoindices.tools.find_doy_ranges_in_dates`.

    :return: A pair of EOFs for each DOY. This series of EOFs has probably still to be postprocessed.
    """
    if implementation == "eofs_package" and not eofs_package_available:
        raise ValueError(
            "Selected calculation with external eofs package, but package not available. Use "
            "internal implementation or install eofs package")
    doys = tools.doy_list()
    eofs = []
    for doy in doys:
        print("Calculating EOFs for DOY %i" % doy)
        if (implementation == "eofs_package"):
            singleeof = calc_eofs_for_doy_using_eofs_package(
                olrdata,
                doy,
                strict_leap_year_treatment=strict_leap_year_treatment)
        else:
            singleeof = calc_eofs_for_doy(
                olrdata,
                doy,
                strict_leap_year_treatment=strict_leap_year_treatment)
        eofs.append(singleeof)
    return eof.EOFDataForAllDOYs(eofs)
Пример #11
0
def test_calc_comparison_stats_for_eofs_all_doys():
    doys = tools.doy_list()
    lat = np.array([-10., 0., 10.])
    long = np.array([0., 5.])

    errors = []

    eofs_reference = []
    eofs_data = []
    for doy in doys:
        eof1 = np.array([1, 2, 3, 4, 5, 6]) * doy
        eof2 = np.array([10, 20, 30, 40, 50, 60]) * doy
        eofs_reference.append(eof.EOFData(lat, long, eof1, eof2))
        if doy == 3:
            eof1 = -1 * eof1
        if doy == 4:
            eof2 = -1 * eof2
        eofs_data.append(eof.EOFData(lat, long, eof1, eof2))
    reference = eof.EOFDataForAllDOYs(eofs_reference)
    data = eof.EOFDataForAllDOYs(eofs_data)

    corr, diff_mean, diff_std, diff_abs_percent68, diff_abs_percent95, diff_abs_percent99 = \
        evalt.calc_comparison_stats_for_eofs_all_doys(reference, data, 1, exclude_doy366=False, percentage=False,
                                                      do_print=False)

    if not (np.allclose(corr[:2], 1) and np.allclose(corr[3:], 1)):
        errors.append("EOF1: Correlations wrong")
    if not np.isclose(corr[2], -1.):
        errors.append("EOF1: Correlation for DOY 3 wrong")
    if not (np.allclose(diff_mean[:2], 0) and np.allclose(diff_mean[3:], 0)):
        errors.append("EOF1: Mean wrong")
    if not diff_mean[2] < 0:
        errors.append("EOF1: Mean for DOY 3 wrong")
    if not (np.allclose(diff_std[:2], 0) and np.allclose(diff_std[3:], 0)):
        errors.append("EOF1: StdDev wrong")
    if not diff_std[2] > 0:
        errors.append("EOF1: StdDev for DOY 3 wrong")
    if not (np.allclose(diff_abs_percent68[:2], 0)
            and np.allclose(diff_abs_percent68[3:], 0)):
        errors.append("EOF1: 68% Percentile wrong")
    if not diff_abs_percent68[2] > 0:
        errors.append("EOF1: 68% Percentile for DOY 3 wrong")
    if not (np.allclose(diff_abs_percent95[:2], 0)
            and np.allclose(diff_abs_percent95[3:], 0)):
        errors.append("EOF1: 95% Percentile wrong")
    if not diff_abs_percent95[2] > 0:
        errors.append("EOF1: 95% Percentile for DOY 3 wrong")
    if not (np.allclose(diff_abs_percent99[:2], 0)
            and np.allclose(diff_abs_percent99[3:], 0)):
        errors.append("EOF1: 99% Percentile wrong")
    if not diff_abs_percent99[2] > 0:
        errors.append("EOF1: 99% Percentile for DOY 3 wrong")

    corr, diff_mean, diff_std, diff_abs_percent68, diff_abs_percent95, diff_abs_percent99 = evalt.calc_comparison_stats_for_eofs_all_doys(
        reference,
        data,
        2,
        exclude_doy366=False,
        percentage=False,
        do_print=False)

    if not (np.allclose(corr[:3], 1) and np.allclose(corr[4:], 1)):
        errors.append("EOF2: Correlations wrong")
    if not np.isclose(corr[3], -1.):
        errors.append("EOF2: Correlation for DOY 4 wrong")
    if not (np.allclose(diff_mean[:3], 0) and np.allclose(diff_mean[4:], 0)):
        errors.append("EOF2: Mean wrong")
    if not diff_mean[3] < 0:
        errors.append("EOF2: Mean for DOY 4 wrong")
    if not (np.allclose(diff_std[:3], 0) and np.allclose(diff_std[4:], 0)):
        errors.append("EOF2: StdDev wrong")
    if not diff_std[3] > 0:
        errors.append("EOF2: StdDev for DOY 4 wrong")
    if not (np.allclose(diff_abs_percent68[:3], 0)
            and np.allclose(diff_abs_percent68[4:], 0)):
        errors.append("EOF2: 68% Percentile wrong")
    if not diff_abs_percent68[3] > 0:
        errors.append("EOF2: 68% Percentile for DOY 4 wrong")
    if not (np.allclose(diff_abs_percent95[:3], 0)
            and np.allclose(diff_abs_percent95[4:], 0)):
        errors.append("EOF2: 95% Percentile wrong")
    if not diff_abs_percent95[3] > 0:
        errors.append("EOF2: 95% Percentile for DOY 4 wrong")
    if not (np.allclose(diff_abs_percent99[:3], 0)
            and np.allclose(diff_abs_percent99[4:], 0)):
        errors.append("EOF2: 99% Percentile wrong")
    if not diff_abs_percent99[3] > 0:
        errors.append("EOF2: 99% Percentile for DOY 4 wrong")

    assert not errors, "errors occurred:\n{}".format("\n".join(errors))