Пример #1
0
def test_fetch_hitemp_all_molecules(molecule, verbose=False, *args, **kwargs):
    """Test fetch HITEMP for all molecules whose download URL is available.

    ..warning::
        this downloads gigabytes of data. It is unselected by default by Pytest
        (see radis/setup.cfg)

    The bz2 compression factor gives about 17 MB / million lines :

    - OH (57 k lines) is only 900 kb
    - CO (0.7 M lines) is only ~14 Mb
    - CH4 (114 M lines) is 435 MB

    If it fails, check the databases downloaded in ~/.radisdb



    Notes
    -----

    Performance tests of chunksize tested on CO :
        - chunksize=1000000  > 22s  , 1 iteration ~ 22s
        - chunksize=100000 > 18s,  , 1 iteration ~ 4s
        - chunksize=50000 > 19s   ,1 iteration ~ 2s
        - chunksize=1000 --> 90s  ,  1 iteration << 1s
    """

    df = fetch_hitemp(molecule, verbose=verbose)

    assert f"HITEMP-{molecule}" in getDatabankList()

    assert len(df) == INFO_HITEMP_LINE_COUNT[molecule]
Пример #2
0
def test_fetch_hitemp_OH(verbose=True, *args, **kwargs):
    """Test proper download of HITEMP OH database.

    Good to test fetch_hitemp.
    ``13_HITEMP2020.par.bz2`` is only 900 kb so it can be run on online
    tests without burning the planet ЁЯМ│

    тЪая╕П if using the default `chunksize=100000`, it uncompresses in one pass
    (only 57k lines for OH) and we cannot test that appending to the same HDF5
    works. So here we use a smaller chunksize of 20,000.
    .

    """

    df = fetch_hitemp("OH",
                      cache="regen",
                      chunksize=20000,
                      verbose=3 * verbose)

    assert "HITEMP-OH" in getDatabankList()

    assert len(df) == 57019

    # Load again and make sure it works (ex: metadata properly loaded etc.):
    fetch_hitemp("OH")
Пример #3
0
def build_test_databases(verbose=True):
    ''' Build test databases and add them in ~/.radis. Generate the file if it 
    doesnt exist
    
    In particular:
    
    - HITRAN-CO2-TEST: CO2, HITRAN 2016, 4165-4200 nm 
    - HITRAN-CO-TEST: CO, HITRAN 2016, 2000-2300 cm-1
    
    These test databases are used to run the different test routines. They can
    obviously be used by Users to run simulations, but we suggest Users to download
    their own line databases files and add them to ~/.radis so they have more control
    on it
    
    '''

    # Get list of databases
    try:
        dbnames = getDatabankList()
    except FileNotFoundError:
        dbnames = []

    # %% Add test databases

    def add_to_parser(config, name, dic):
        for k, v in dic.items():
            config[name][k] = v
        if verbose: print("Adding '{0}' database in ~/.radis".format(name))

    for dbname, dbentries in TEST_DATABASES.items():

        if dbname in dbnames:  # Check entries are correct
            #            for k
            diff = diffDatabankEntries(getDatabankEntries(dbname),
                                       dbentries,
                                       verbose=False)
            if diff is not None:
                raise ValueError('{0}'.format(diff)+\
                                 '\nIn ~/.radis\n----------\n{0}'.format(getDatabankEntries(dbname))+\
                                 '\n\nExpected\n---------\n{0}\n\n'.format(dbentries)+\
                                 'Test Database {0} doesnt match expected '.format(dbname)+\
                                 'entries for key `{0}`. See comparison above. '.format(diff)+\
                                 'To regenerate test databases just delete the {0} '.format(dbname)+\
                                 'entry in your ~/.radis')

        else:  #  add them (create ~/.radis file if doesnt exist yet)
            addDatabankEntries(dbname, dbentries)

    return
Пример #4
0
def setup_test_line_databases(verbose=True):
    """Build :py:data:`~radis.test.utils.TEST_DATABASES` and add them in ~/.radis.
    Generate the file if it  doesnt exist

    In particular:

    - HITRAN-CO2-TEST: CO2, HITRAN 2016, 4165-4200 nm
    - HITRAN-CO-TEST: CO, HITRAN 2016, 2000-2300 cm-1
    - HITEMP-CO2-TEST: CO2, HITEMP-2010, 2283.7-2285.1 cm-1, 3 isotopes
    - HITEMP-CO2-HAMIL-TEST: same as previous, with (some) energy levels computed
      from Tashkun effective Hamiltonian.


    These test databases are used to run the different test routines. They can
    obviously be used by Users to run simulations, but we suggest Users to download
    their own line databases files and add them to ~/.radis so they have more control
    on it

    Examples
    --------

    Initialize the Line databases::

        from radis import setup_test_line_databases
        setup_test_line_databases()

    Plot a CO2 spectrum at high temperature::

        from radis import calc_spectrum
        calc_spectrum(2284,
                      2285,
                      Tgas=2000,
                      pressure=1,
                      molecule='CO2',
                      isotope=1
                      databank='HITEMP-CO2-TEST').plot()

    Note that 'HITEMP-CO2-TEST' is defined on 2283.7-2285.1 cm-1 only, as
    can be shown by reading the Database information:

        from radis.misc.config import printDatabankEntries
        printDatabankEntries('HITEMP-CO2-TEST')

        >>> HITEMP-CO2-TEST
        >>> -------
        >>> info : HITEMP-2010, CO2, 3 main isotope (CO2-626, 636, 628), 2283.7-2285.1 cm-1
        >>> path : ['/USER/PATH/TO\\radis\\radis\\test\\files\\cdsd_hitemp_09_fragment.txt']
        >>> format : cdsd-hitemp
        >>> parfuncfmt : hapi
        >>> levelsfmt : radis


    See Also
    --------

    :ref:`Configuration file <label_lbl_config_file>`,
    :py:func:`~radis.misc.config.getDatabankList`,
    :py:func:`~radis.misc.config.printDatabankEntries`

    """
    # TODO: generate large band databases for the main species (let's say CO2,
    # H2O and CH4) and main isotopes by fetching the HITRAN 2016 database.

    # Get list of databases
    try:
        dbnames = getDatabankList()
    except FileNotFoundError:
        dbnames = []

    # %% Add test databases

    def add_to_parser(config, name, dic):
        for k, v in dic.items():
            config[name][k] = v
        if verbose:
            print("Adding '{0}' database in ~/.radis".format(name))

    for dbname, dbentries in TEST_DATABASES.items():

        if dbname in dbnames:  # Check entries are correct
            #            for k
            diff = diffDatabankEntries(getDatabankEntries(dbname),
                                       dbentries,
                                       verbose=False)
            if diff is not None:
                raise ValueError(
                    "{0}".format(diff) +
                    "\nIn ~/.radis\n----------\n{0}".format(
                        getDatabankEntries(dbname)) +
                    "\n\nExpected\n---------\n{0}\n\n".format(dbentries) +
                    "Test Database {0} doesnt match expected ".format(dbname) +
                    "entries for key `{0}`. See comparison above. ".format(
                        diff) +
                    "To regenerate test databases just delete the {0} ".format(
                        dbname) + "entry in your ~/.radis")

        else:  # add them (create ~/.radis file if doesnt exist yet)
            addDatabankEntries(dbname, dbentries)

    return
Пример #5
0
def setup_test_line_databases(verbose=True):
    ''' Build :py:data:`~radis.test.utils.TEST_DATABASES` and add them in ~/.radis. 
    Generate the file if it  doesnt exist

    In particular:

    - HITRAN-CO2-TEST: CO2, HITRAN 2016, 4165-4200 nm 
    - HITRAN-CO-TEST: CO, HITRAN 2016, 2000-2300 cm-1
    - HITEMP-CO2-TEST: CO2, HITEMP-2010, 2283.7-2285.1 cm-1, 3 isotopes

    These test databases are used to run the different test routines. They can
    obviously be used by Users to run simulations, but we suggest Users to download
    their own line databases files and add them to ~/.radis so they have more control
    on it
    
    See Also
    --------
    
    :ref:`Configuration file <label_lbl_config_file>`

    '''
    # TODO: generate large band databases for the main species (let's say CO2,
    # H2O and CH4) and main isotopes by fetching the HITRAN 2016 database.

    # Get list of databases
    try:
        dbnames = getDatabankList()
    except FileNotFoundError:
        dbnames = []

    # %% Add test databases

    def add_to_parser(config, name, dic):
        for k, v in dic.items():
            config[name][k] = v
        if verbose:
            print("Adding '{0}' database in ~/.radis".format(name))

    for dbname, dbentries in TEST_DATABASES.items():

        if dbname in dbnames:  # Check entries are correct
            #            for k
            diff = diffDatabankEntries(getDatabankEntries(dbname),
                                       dbentries,
                                       verbose=False)
            if diff is not None:
                raise ValueError(
                    '{0}'.format(diff) +
                    '\nIn ~/.radis\n----------\n{0}'.format(
                        getDatabankEntries(dbname)) +
                    '\n\nExpected\n---------\n{0}\n\n'.format(dbentries) +
                    'Test Database {0} doesnt match expected '.format(dbname) +
                    'entries for key `{0}`. See comparison above. '.format(
                        diff) +
                    'To regenerate test databases just delete the {0} '.format(
                        dbname) + 'entry in your ~/.radis')

        else:  # add them (create ~/.radis file if doesnt exist yet)
            addDatabankEntries(dbname, dbentries)

    return
Пример #6
0
def fetch_hitemp(
    molecule,
    local_databases="~/.radisdb/",
    databank_name="HITEMP-{molecule}",
    isotope=None,
    load_wavenum_min=None,
    load_wavenum_max=None,
    cache=True,
    verbose=True,
    chunksize=100000,
    clean_cache_files=True,
):
    """Stream HITEMP file from HITRAN website. Unzip and build a HDF5 file directly.

    Returns a Pandas DataFrame containing all lines.

    Parameters
    ----------
    molecule: `"CO2", "N2O", "CO", "CH4", "NO", "NO2", "OH"`
        HITEMP molecule. See :py:attr:`~radis.io.hitemp.HITEMP_SOURCE_FILES`
    local_databases: str
        where to create the RADIS HDF5 files. Default ``"~/.radisdb/"``
    databank_name: str
        name of the databank in RADIS :ref:`Configuration file <label_lbl_config_file>`
        Default ``"HITEMP-{molecule}"``
    isotope: str
        load only certain isotopes : ``'2'``, ``'1,2'``, etc. If ``None``, loads
        everything. Default ``None``.
    load_wavenum_min, load_wavenum_max: float (cm-1)
        load only specific wavenumbers.

    Other Parameters
    ----------------
    cache: bool, or ``'regen'``
        if ``True``, use existing HDF5 file. If ``False`` or ``'regen'``, rebuild it.
    verbose: bool
    chunksize: int
        number of lines to process at a same time. Higher is usually faster
        but can create Memory problems and keep the user uninformed of the progress.
    clean_cache_files: bool
        if ``True`` clean downloaded cache files after HDF5 are created.

    Returns
    -------
    df: pd.DataFrame
        Line list
        A HDF5 file is also created in ``local_databases`` and referenced
        in the :ref:`RADIS config file <label_lbl_config_file>` with name
        ``databank_name``

    Notes
    -----
    if using ``load_only_wavenum_above/below`` or ``isotope``, the whole
    database is anyway downloaded and uncompressed to ``local_databases``
    fast access .HDF5 files (which will take a long time on first call). Only
    the expected wavenumber range & isotopes are returned. The .HFD5 parsing uses
    :py:func:`~radis.io.hdf5.hdf2df`

    See Also
    --------
    :py:func:`~radis.io.hdf5.hdf2df`

    """
    # TODO ? : unzip only parts of the database
    # see https://github.com/radis/radis/pull/194

    if databank_name == "HITEMP-{molecule}":
        databank_name = databank_name.format(**{"molecule": molecule})
    local_databases = abspath(local_databases.replace("~", expanduser("~")))

    if molecule in ["H2O", "CO2"]:
        raise NotImplementedError(
            "Automatic HITEMP download not implemented for {0} : multiple files. Download manually on https://hitran.org/hitemp/ "
            .format(molecule))

    try:
        inputf = HITEMP_SOURCE_FILES[molecule]
    except KeyError as err:
        raise KeyError(
            f"Please choose one of HITEMP molecules : {list(HITEMP_SOURCE_FILES.keys())}. Got '{molecule}'"
        ) from err
    urlname = BASE_URL + inputf

    try:
        os.mkdir(local_databases)
    except OSError:
        pass
    else:
        if verbose:
            print("Created folder :", local_databases)

    output = abspath(
        join(local_databases,
             molecule + "-" + inputf.replace(".par.bz2", ".h5")))

    if not cache or cache == "regen":
        # Delete existing HDF5 file
        if exists(output):
            if verbose:
                print("Removing existing file ", output)
                # TODO: also clean the getDatabankList? Todo once it is in JSON format. https://github.com/radis/radis/issues/167
            os.remove(output)

    if exists(output):
        # check metadata :
        check_not_deprecated(
            output,
            metadata_is={},
            metadata_keys_contain=["wavenumber_min", "wavenumber_max"],
        )
        # check database is registered in ~/.radis
        if not databank_name in getDatabankList():
            # if not, check number of rows is correct :
            error_msg = ""
            with pd.HDFStore(output, "r") as store:
                nrows = store.get_storer("df").nrows
                if nrows != INFO_HITEMP_LINE_COUNT[molecule]:
                    error_msg += (
                        f"\nNumber of lines in local database ({nrows:,}) " +
                        "differ from the expected number of lines for " +
                        f"HITEMP {molecule}: {INFO_HITEMP_LINE_COUNT[molecule]}"
                    )
                file_metadata = store.get_storer("df").attrs.metadata
                for k in [
                        "wavenumber_min",
                        "wavenumber_max",
                        "download_url",
                        "download_date",
                ]:
                    if k not in file_metadata:
                        error_msg += (
                            "\nMissing key in file metadata to register the database "
                            + f"automatically : {k}")

            if error_msg:
                raise ValueError(
                    f"{databank_name} not declared in your RADIS ~/.config file although "
                    + f"{output} exists. {error_msg}\n" +
                    "If you know this file, add it to ~/.radisdb manually. " +
                    "Else regenerate the database with:\n\t" +
                    ">>> radis.SpectrumFactory().fetch_databank(..., use_cached='regen')"
                    + "\nor\n\t" +
                    ">>> radis.io.hitemp.fetch_hitemp({molecule}, cache='regen')"
                    +
                    "\n\n⚠️ It will re-download & uncompress the whole database "
                    +
                    "from HITEMP.\n\nList of declared databanks: {getDatabankList()}.\n"
                    + f"{output} metadata: {file_metadata}")

            # Else database looks ok : register it
            if verbose:
                print(
                    f"{databank_name} not declared in your RADIS ~/.config file although "
                    +
                    f"{output} exists. Registering the database automatically."
                )

            register_database(
                databank_name,
                [output],
                molecule=molecule,
                wmin=file_metadata["wavenumber_min"],
                wmax=file_metadata["wavenumber_max"],
                download_date=file_metadata["download_date"],
                urlname=file_metadata["download_url"],
                verbose=verbose,
            )

        if verbose:
            print(f"Using existing database {databank_name}")
        return hdf2df(
            output,
            isotope=isotope,
            load_wavenum_min=load_wavenum_min,
            load_wavenum_max=load_wavenum_max,
            verbose=verbose,
        )

    # Doesnt exist : download
    ds = DataSource(join(local_databases, "downloads"))

    if verbose:
        print(f"Downloading {inputf} for {molecule}.")
    download_date = date.today().strftime("%d %b %Y")

    columns = columns_2004

    # Get linereturn (depends on OS, but file may also have been generated
    # on a different OS. Here we simply read the file to find out)
    with ds.open(urlname) as gfile:  # locally downloaded file

        dt = _create_dtype(
            columns, "a2"
        )  # 'a2' allocates space to get \n or \n\r for linereturn character
        b = np.zeros(1, dtype=dt)
        gfile.readinto(b)
        linereturnformat = _get_linereturnformat(b, columns)

    with ds.open(urlname) as gfile:  # locally downloaded file

        dt = _create_dtype(columns, linereturnformat)
        b = np.zeros(chunksize,
                     dtype=dt)  # receives the HITRAN 160-character data.
        wmin = np.inf
        wmax = 0
        if verbose:
            print(
                f"Download complete. Building {molecule} database to {output}")

        with pd.HDFStore(output, mode="a", complib="blosc", complevel=9) as f:
            Nlines = 0
            Ntotal_lines_expected = INFO_HITEMP_LINE_COUNT[molecule]
            pb = ProgressBar(N=Ntotal_lines_expected, active=verbose)
            for nbytes in iter(lambda: gfile.readinto(b), 0):

                if not b[-1]:
                    # End of file flag within the chunk (but does not start
                    # with End of file flag) so nbytes != 0
                    b = get_last(b)

                df = _ndarray2df(b, columns, linereturnformat)

                # df.to_hdf(
                #     output, "df", format="table", append=True, complib="blosc", complevel=9
                # )
                f.put(
                    key="df",
                    value=df,
                    append=True,
                    format="table",
                    data_columns=DATA_COLUMNS,
                )

                wmin = np.min((wmin, df.wav.min()))
                wmax = np.max((wmax, df.wav.max()))
                Nlines += len(df)
                pb.update(
                    Nlines,
                    message=
                    f"Parsed {Nlines:,} / {Ntotal_lines_expected:,} lines. Wavenumber range {wmin:.2f}-{wmax:.2f} cm-1 is complete.",
                )

                # Reinitialize for next read
                b = np.zeros(
                    chunksize,
                    dtype=dt)  # receives the HITRAN 160-character data.

            f.get_storer("df").attrs.metadata = {
                "wavenumber_min": wmin,
                "wavenumber_max": wmax,
                "download_date": download_date,
                "download_url": urlname,
                "version": radis.__version__,
            }
            pb.done()

    # Done: add final checks
    # ... check on the created file that all lines are there :
    with pd.HDFStore(output, "r") as store:
        nrows = store.get_storer("df").nrows
        assert nrows == Nlines
        if nrows != INFO_HITEMP_LINE_COUNT[molecule]:
            raise AssertionError(
                f"Number of lines in local database ({nrows:,}) " +
                "differ from the expected number of lines for " +
                f"HITEMP {molecule}: {INFO_HITEMP_LINE_COUNT[molecule]}" +
                ". Check that there was no recent update on HITEMP. " +
                "Else it may be a download error ?")

    # Add database to  ~/.radis
    register_database(databank_name, [output], molecule, wmin, wmax,
                      download_date, urlname, verbose)

    # Fully unzipped : clean
    if clean_cache_files:
        os.remove(ds._findfile(urlname))
        if verbose >= 3:
            from radis.misc.printer import printg

            printg("... removed downloaded cache file")

    return hdf2df(
        output,
        isotope=isotope,
        load_wavenum_min=load_wavenum_min,
        load_wavenum_max=load_wavenum_max,
        verbose=verbose,
    )