def get_label_hitran_locglob(row, details): ''' Todo ------- replace with simple astype(str) statements and str operations ex: > '['+df[locl].astype(str)+']('+df[globl].astype(str)+'->'+ > df[globu].astype(str)'+)' will be much faster! ''' molecule = get_molecule(row.id) label = ('{0}[iso{1}]'.format(molecule, row['iso']) + '[{locl}]({globl})->({globu})'.format( **dict([(k, row[k]) for k in ['locl', 'globl', 'globu']]))) for k in details: name, _, unit = details[k] if is_float(row[k]): label += '\n{0} {1}: {2:.3g} {3}'.format(k, name, row[k], unit) else: label += '\n{0} {1}: {2} {3}'.format(k, name, row[k], unit) return label
def get_label_hitran(row, details): ''' Todo ------- replace with simple astype(str) statements and str operations ex: > '['+df[locl].astype(str)+']('+df[globl].astype(str)+'->'+ > df[globu].astype(str)'+)' will be much faster! ''' molecule = get_molecule(row.id) # Get global labels if molecule in HITRAN_CLASS1: label = ( '{molec}[iso{iso:.0f}] [{branch}{jl:.0f}]({vl:.0f})->({vu:.0f})' .format( **dict([(k, row[k]) for k in ['vu', 'vl', 'jl', 'iso']] + [('molec', molecule), ('branch', _fix_branch_format[row['branch']])]))) elif molecule in HITRAN_CLASS4: label = ( '{molec}[iso{iso:.0f}] [{branch}{jl:.0f}]({v1l:.0f}{v2l:.0f}`{l2l:.0f}`{v3l:.0f})->({v1u:.0f}{v2u:.0f}`{l2u:.0f}`{v3u:.0f})' .format(**dict([(k, row[k]) for k in [ 'v1u', 'v2u', 'l2u', 'v3u', 'v1l', 'v2l', 'l2l', 'v3l', 'jl', 'iso' ]] + [('molec', molecule), ('branch', _fix_branch_format[row['branch']])]))) elif molecule in HITRAN_CLASS5: label = ( '{molec}[iso{iso:.0f}] [{branch}{jl:.0f}]({v1l:.0f}{v2l:.0f}`{l2l:.0f}`{v3l:.0f} {rl:.0f})->({v1u:.0f}{v2u:.0f}`{l2u:.0f}`{v3u:.0f} {ru:.0f})' .format(**dict([(k, row[k]) for k in [ 'v1u', 'v2u', 'l2u', 'v3u', 'v1l', 'v2l', 'l2l', 'v3l', 'rl', 'ru', 'jl', 'iso' ]] + [('molec', molecule), ('branch', _fix_branch_format[row['branch']])]))) else: raise NotImplementedError( 'No label for {0}. Please add it!'.format(molecule)) # Add details about some line properties for k in details: name, _, unit = details[k] if is_float(row[k]): label += '\n{0} {1}: {2:.3g} {3}'.format(k, name, row[k], unit) else: label += '\n{0} {1}: {2} {3}'.format(k, name, row[k], unit) return label
def get_label_hitran_branchjv(row, details): molecule = get_molecule(row.id) label = ('{0}[iso{1}]'.format(molecule, row['iso']) + '[{branch}{jl}]({v1l})->({v1u})'.format( **dict([(k, row[k]) for k in ['branch', 'jl', 'v1l', 'v1u']]))) for k in details: name, _, unit = details[k] if is_float(row[k]): label += '\n{0} {1}: {2:.3g} {3}'.format(k, name, row[k], unit) else: label += '\n{0} {1}: {2} {3}'.format(k, name, row[k], unit) return label
def __init__(self, name, verbose=True): # @dev: no optional kwargs here (final stop) self.verbose = verbose # Get name and integer id if isinstance(name, string_types): self.name = name # Get name without parenthesis (without state) for HITRAN identification filtername = re.sub(r"[\(\[].*?[\)\]]", "", name) try: self.id = get_molecule_identifier(filtername) except KeyError: # Not an HITRAN molecule self.id = None elif type(name) == int: self.id = name self.name = get_molecule(name) else: raise ValueError('Wrong name type:', name)
def add_bands(df, dbformat, lvlformat, verbose=True): ''' Assign all transitions to a vibrational band: Add 'band', 'viblvl_l' and 'viblvl_u' attributes for each line to allow parsing the lines by band with:: df0.groupby('band') Parameters ---------- df: pandas Dataframe Line (transitions) database dbformat: one of :data:`~radis.lbl.loader.KNOWN_DBFORMAT` : ``'cdsd```, ``'hitemp'`` format of Line database lvlformat: 'cdsd`, 'hitemp' format of Returns ------- None input df is changed Examples -------- Add transitions in a Dataframe based on CDSD (p, c, j, n) format:: add_bands(df, 'cdsd') Notes ----- Performance with test case (CDSD CO2 2380-2400 cm-1): - Initial: with .apply() 8.08 s ± 95.2 ms - with groupby(): 9s worse!! - using simple (and more readable) astype(str) statements: 523 ms ± 19.6 ms ''' # Check inputs if not dbformat in KNOWN_DBFORMAT: raise ValueError('dbformat ({0}) should be one of: {1}'.format( dbformat, KNOWN_DBFORMAT)) if not lvlformat in KNOWN_LVLFORMAT: raise ValueError('lvlformat ({0}) should be one of: {1}'.format( lvlformat, KNOWN_LVLFORMAT)) if verbose: t0 = time() print('... sorting lines by vibrational bands') # Calculate bands: id = list(pd.unique(df['id'])) if len(id) > 1: raise ValueError('Cant calculate vibrational bands for multiple ' + 'molecules yet') # although it's an easy fix. Just # groupby id molecule = get_molecule(id[0]) if molecule == 'CO2': vib_lvl_name_hitran = vib_lvl_name_hitran_class5 if lvlformat in ['cdsd-pc', 'cdsd-pcN', 'cdsd-hamil']: # ensures that vib_lvl_name functions wont crash if dbformat not in ['cdsd', 'cdsd4000', 'hitran']: raise NotImplementedError( 'lvlformat {0} not supported with dbformat {1}'.format( lvlformat, dbformat)) # Use vibrational nomenclature of CDSD (p,c,j,n) or HITRAN (v1v2l2v3J) # depending on the Level Database. # In both cases, store the other one. # ... note: vib level in a CDSD (p,c,j,n) database is ambiguous. # ... a vibrational energy Evib can have been defined for every (p, c) group: if lvlformat in ['cdsd-pc']: viblvl_l_cdsd = vib_lvl_name_cdsd_pc(df.polyl, df.wangl) viblvl_u_cdsd = vib_lvl_name_cdsd_pc(df.polyu, df.wangu) # ... or for every (p, c, N) group: elif lvlformat in ['cdsd-pcN']: viblvl_l_cdsd = vib_lvl_name_cdsd_pcN(df.polyl, df.wangl, df.rankl) viblvl_u_cdsd = vib_lvl_name_cdsd_pcN(df.polyu, df.wangu, df.ranku) # ... or for every level (p, c, J ,N) (that's the case if coupling terms # are used taken into account... it also takes a much longer time # to look up vibrational energies in the LineDatabase, warning!): elif lvlformat in ['cdsd-hamil']: viblvl_l_cdsd = vib_lvl_name_cdsd_pcJN(df.polyl, df.wangl, df.jl, df.rankl) viblvl_u_cdsd = vib_lvl_name_cdsd_pcJN(df.polyu, df.wangu, df.ju, df.ranku) else: raise ValueError( 'Unexpected level format: {0}'.format(lvlformat)) band_cdsd = viblvl_l_cdsd + '->' + viblvl_u_cdsd df.loc[:, 'viblvl_l'] = viblvl_l_cdsd df.loc[:, 'viblvl_u'] = viblvl_u_cdsd df.loc[:, 'band'] = band_cdsd # Calculate HITRAN format too (to store them)) if all_in(['v1l', 'v2l', 'l2l', 'v3l'], df): viblvl_l_hitran = vib_lvl_name_hitran(df.v1l, df.v2l, df.l2l, df.v3l) viblvl_u_hitran = vib_lvl_name_hitran(df.v1u, df.v2u, df.l2u, df.v3u) band_hitran = viblvl_l_hitran + '->' + viblvl_u_hitran df.loc[:, 'viblvl_htrn_l'] = viblvl_l_hitran df.loc[:, 'viblvl_htrn_u'] = viblvl_u_hitran df.loc[:, 'band_htrn'] = band_hitran # 'radis' uses Dunham development based on v1v2l2v3 HITRAN convention elif lvlformat in ['radis']: if dbformat not in ['hitran', 'cdsd']: raise NotImplementedError( 'lvlformat `{0}` not supported with dbformat `{1}`'.format( lvlformat, dbformat)) # Calculate bands with HITRAN convention viblvl_l_hitran = vib_lvl_name_hitran(df.v1l, df.v2l, df.l2l, df.v3l) viblvl_u_hitran = vib_lvl_name_hitran(df.v1u, df.v2u, df.l2u, df.v3u) band_hitran = viblvl_l_hitran + '->' + viblvl_u_hitran df.loc[:, 'viblvl_l'] = viblvl_l_hitran df.loc[:, 'viblvl_u'] = viblvl_u_hitran df.loc[:, 'band'] = band_hitran else: raise NotImplementedError( 'Cant deal with lvlformat={0} for {1}'.format( lvlformat, molecule)) elif molecule in HITRAN_CLASS1: # includes 'CO' # Note. TODO. Move that in loader.py (or somewhere consistent with # classes defined in cdsd.py / hitran.py) if lvlformat in ['radis']: # ensures that vib_lvl_name functions wont crash if dbformat not in ['hitran']: raise NotImplementedError( 'lvlformat {0} not supported with dbformat {1}'.format( lvlformat, dbformat)) vib_lvl_name = vib_lvl_name_hitran_class1 df.loc[:, 'viblvl_l'] = vib_lvl_name(df['vl']) df.loc[:, 'viblvl_u'] = vib_lvl_name(df['vu']) df.loc[:, 'band'] = df['viblvl_l'] + '->' + df['viblvl_u'] else: raise NotImplementedError( 'Lvlformat not defined for {0}: {1}'.format( molecule, lvlformat)) else: raise NotImplementedError( 'Vibrational bands not yet defined for molecule: ' + '{0} with database format: {1}. '.format(molecule, dbformat) + 'Update add_bands()') if verbose: print(('... lines sorted in {0:.1f}s'.format(time() - t0))) return
def fetch_astroquery(molecule, isotope, wmin, wmax, verbose=True): ''' Wrapper to Astroquery [1]_ fetch function to download a line database Notes ----- Astroquery [1]_ is itself based on [HAPI]_ Parameters ---------- molecule: str, or int molecule name or identifier isotope: int isotope number wmin, wmax: float (cm-1) wavenumber min and max Other Parameters ---------------- verbose: boolean Default ``True`` References ---------- .. [1] `Astroquery <https://astroquery.readthedocs.io>`_ See Also -------- :func:`astroquery.hitran.reader.download_hitran`, :func:`astroquery.hitran.reader.read_hitran_file` ''' # Check input if not is_float(molecule): mol_id = get_molecule_identifier(molecule) else: mol_id = molecule molecule = get_molecule(mol_id) assert is_float(isotope) empty_range = False # tbl = Hitran.query_lines_async(molecule_number=mol_id, # isotopologue_number=isotope, # min_frequency=wmin / u.cm, # max_frequency=wmax / u.cm) # # Download using the astroquery library response = Hitran.query_lines_async(molecule_number=mol_id, isotopologue_number=isotope, min_frequency=wmin / u.cm, max_frequency=wmax / u.cm) if response.status_code == 404: # Maybe there are just no lines for this species in this range # In that case we usually end up with errors like: # (<class 'Exception'>, Exception('Query failed: 404 Client Error: # Not Found for url: http://hitran.org/lbl/api?numax=25000&numin=19000&iso_ids_list=69\n',), # <traceback object at 0x7f0967c91708>) if response.reason == 'Not Found': # Let's bet it's just that there are no lines in this range empty_range = True if verbose: print(( 'No lines for {0} (id={1}), iso={2} in range {3:.2f}-{4:.2f}cm-1. ' .format(molecule, mol_id, isotope, wmin, wmax))) else: raise ValueError( 'An error occured during the download of HITRAN files ' + 'for {0} (id={1}), iso={2} between {3:.2f}-{4:.2f}cm-1. '. format(molecule, mol_id, isotope, wmin, wmax) + 'Are you online?\n' + 'See details of the error below:\n\n {0}'.format( response.reason)) # Rename columns from Astroquery to RADIS format rename_columns = { 'molec_id': 'id', 'local_iso_id': 'iso', 'nu': 'wav', 'sw': 'int', 'a': 'A', 'gamma_air': 'airbrd', 'gamma_self': 'selbrd', 'elower': 'El', 'n_air': 'Tdpair', 'delta_air': 'Pshft', 'global_upper_quanta': 'globu', 'global_lower_quanta': 'globl', 'local_upper_quanta': 'locu', 'local_lower_quanta': 'locl', 'line_mixing_flag': 'lmix', 'gp': 'gp', 'gpp': 'gpp', } if not empty_range: # _fix_astroquery_file_format(filename) # Note: as of 0.9.16 we're not fixing astroquery_file_format anymore. # maybe we should. tbl = Hitran._parse_result(response) df = tbl.to_pandas() df = df.rename(columns=rename_columns) else: df = pd.DataFrame(columns=list(rename_columns.values())) # Cast type to float64 cast_type = { 'wav': np.float64, 'int': np.float64, 'A': np.float64, 'airbrd': np.float64, 'selbrd': np.float64, 'El': np.float64, 'Tdpair': np.float64, 'Pshft': np.float64, } for c, typ in cast_type.items(): df[c] = df[c].astype(typ) return df
def fetch_astroquery(molecule, isotope, wmin, wmax, verbose=True): ''' Wrapper to Astroquery [1]_ fetch function to download a line database Notes ----- Astroquery [1]_ is itself based on HAPI [2]_ Parameters ---------- molecule: str, or int molecule name or identifier isotope: int isotope number wmin, wmax: float (cm-1) wavenumber min and max Other Parameters ---------------- verbose: boolean Default True References ---------- .. [1] `Astroquery <https://astroquery.readthedocs.io>`_ .. [2] `HAPI: The HITRAN Application Programming Interface <http://hitran.org/hapi>`_ ''' # Check input if not is_float(molecule): mol_id = get_molecule_identifier(molecule) else: mol_id = molecule molecule = get_molecule(mol_id) assert is_float(isotope) empty_range = False # Download using the astroquery library try: download_hitran(mol_id, isotope, wmin, wmax) except: # Maybe there are just no lines for this species in this range # In that case we usually end up with errors like: # (<class 'Exception'>, Exception('Query failed: 404 Client Error: # Not Found for url: http://hitran.org/lbl/api?numax=25000&numin=19000&iso_ids_list=69\n',), # <traceback object at 0x7f0967c91708>) import sys _err_class, _err_details, _err_obj = sys.exc_info() if 'Not Found for url:' in str(_err_details): # Let's bet it's just that there are no lines in this range empty_range = True if verbose: print(( 'Not lines for {0} (id={1}), iso={2} between {3:.2f}-{4:.2f}cm-1. ' .format(molecule, mol_id, isotope, wmin, wmax))) else: raise ValueError('An error occured during the download of HITRAN files '+\ 'for {0} (id={1}), iso={2} between {3:.2f}-{4:.2f}cm-1. '.format( molecule, mol_id, isotope, wmin, wmax)+\ 'See details of the error below: {0}'.format(_err_details)) # Rename columns from Astroquery to RADIS format rename_columns = { 'molec_id': 'id', 'local_iso_id': 'iso', 'nu': 'wav', 'sw': 'int', 'a': 'A', 'gamma_air': 'airbrd', 'gamma_self': 'selbrd', 'elower': 'El', 'n_air': 'Tdpair', 'delta_air': 'Pshft', 'global_upper_quanta': 'globu', 'global_lower_quanta': 'globl', 'local_upper_quanta': 'locu', 'local_lower_quanta': 'locl', 'line_mixing_flag': 'lmix', 'gp': 'gp', 'gpp': 'gpp', } if not empty_range: tbl = read_hitran_file(join(cache_location, molecule + '.data')) df = tbl.to_pandas() df = df.rename(columns=rename_columns) else: df = pd.DataFrame(columns=list(rename_columns.values())) # Cast type to float64 cast_type = { 'wav': np.float64, 'int': np.float64, 'A': np.float64, 'airbrd': np.float64, 'selbrd': np.float64, 'El': np.float64, 'Tdpair': np.float64, 'Pshft': np.float64, } for c, typ in cast_type.items(): df[c] = df[c].astype(typ) return df
def fetch_astroquery(molecule, isotope, wmin, wmax, verbose=True, cache=True, metadata={}): ''' Wrapper to Astroquery [1]_ fetch function to download a line database Notes ----- Astroquery [1]_ is itself based on [HAPI]_ Parameters ---------- molecule: str, or int molecule name or identifier isotope: int isotope number wmin, wmax: float (cm-1) wavenumber min and max Other Parameters ---------------- verbose: boolean Default ``True`` cache: boolean if ``True``, tries to find a ``.h5`` cache file in the Astroquery :py:attr:`~astroquery.query.BaseQuery.cache_location`, that would match the requirements. If not found, downloads it and saves the line dataframe as a ``.h5`` file in the Astroquery. metadata: dict if ``cache=True``, check that the metadata in the cache file correspond to these attributes. Arguments ``molecule``, ``isotope``, ``wmin``, ``wmax`` are already added by default. References ---------- .. [1] `Astroquery <https://astroquery.readthedocs.io>`_ See Also -------- :py:func:`astroquery.hitran.reader.download_hitran`, :py:func:`astroquery.hitran.reader.read_hitran_file`, :py:attr:`~astroquery.query.BaseQuery.cache_location` ''' # Check input if not is_float(molecule): mol_id = get_molecule_identifier(molecule) else: mol_id = molecule molecule = get_molecule(mol_id) assert is_float(isotope) empty_range = False # If cache, tries to find from Astroquery: if cache: # Update metadata with physical properties from the database. metadata.update({ 'molecule': molecule, 'isotope': isotope, 'wmin': wmin, 'wmax': wmax }) fcache = join( Hitran.cache_location, CACHE_FILE_NAME.format( **{ 'molecule': molecule, 'isotope': isotope, 'wmin': wmin, 'wmax': wmax })) check_cache_file(fcache=fcache, use_cached=cache, metadata=metadata, verbose=verbose) if exists(fcache): try: return get_cache_file(fcache, verbose=verbose) except Exception as err: if verbose: printr( 'Problem reading cache file {0}:\n{1}\nDeleting it!'. format(fcache, str(err))) os.remove(fcache) # tbl = Hitran.query_lines_async(molecule_number=mol_id, # isotopologue_number=isotope, # min_frequency=wmin / u.cm, # max_frequency=wmax / u.cm) # # Download using the astroquery library try: response = Hitran.query_lines_async(molecule_number=mol_id, isotopologue_number=isotope, min_frequency=wmin / u.cm, max_frequency=wmax / u.cm) except KeyError as err: raise KeyError(str(err)+' <<w this error occured in Astroquery. Maybe these molecule '+\ '({0}) and isotope ({1}) are not supported'.format(molecule, isotope)) from err # Deal with usual errors if response.status_code == 404: # Maybe there are just no lines for this species in this range # In that case we usually end up with errors like: # (<class 'Exception'>, Exception('Query failed: 404 Client Error: # Not Found for url: http://hitran.org/lbl/api?numax=25000&numin=19000&iso_ids_list=69\n',), # <traceback object at 0x7f0967c91708>) if response.reason == 'Not Found': # Let's bet it's just that there are no lines in this range empty_range = True if verbose: print(( 'No lines for {0} (id={1}), iso={2} in range {3:.2f}-{4:.2f}cm-1. ' .format(molecule, mol_id, isotope, wmin, wmax))) else: raise ValueError( 'An error occured during the download of HITRAN files ' + 'for {0} (id={1}), iso={2} between {3:.2f}-{4:.2f}cm-1. '. format(molecule, mol_id, isotope, wmin, wmax) + 'Are you online?\n' + 'See details of the error below:\n\n {0}'.format( response.reason)) elif response.status_code == 500: raise ValueError('{0} while querying the HITRAN server: '.format(response.status_code)+\ '\n\n{0}'.format(response.text)) # Process response # Rename columns from Astroquery to RADIS format rename_columns = { 'molec_id': 'id', 'local_iso_id': 'iso', 'nu': 'wav', 'sw': 'int', 'a': 'A', 'gamma_air': 'airbrd', 'gamma_self': 'selbrd', 'elower': 'El', 'n_air': 'Tdpair', 'delta_air': 'Pshft', 'global_upper_quanta': 'globu', 'global_lower_quanta': 'globl', 'local_upper_quanta': 'locu', 'local_lower_quanta': 'locl', 'line_mixing_flag': 'lmix', 'gp': 'gp', 'gpp': 'gpp', } if not empty_range: # _fix_astroquery_file_format(filename) # Note: as of 0.9.16 we're not fixing astroquery_file_format anymore. # maybe we should. tbl = Hitran._parse_result(response) df = tbl.to_pandas() df = df.rename(columns=rename_columns) else: df = pd.DataFrame(columns=list(rename_columns.values())) # Cast type to float64 cast_type = { 'wav': np.float64, 'int': np.float64, 'A': np.float64, 'airbrd': np.float64, 'selbrd': np.float64, 'El': np.float64, 'Tdpair': np.float64, 'Pshft': np.float64, } for c, typ in cast_type.items(): df[c] = df[c].astype(typ) # cached file mode but cached file doesn't exist yet (else we had returned) if cache: if verbose: print('Generating cached file: {0}'.format(fcache)) try: save_to_hdf(df, fcache, metadata=metadata, version=radis.__version__, key='df', overwrite=True, verbose=verbose) except: if verbose: print(sys.exc_info()) print( 'An error occured in cache file generation. Lookup access rights' ) pass return df
def get_label_hitran(row, details): """ Todo ------- replace with simple astype(str) statements and str operations ex: > '['+df[locl].astype(str)+']('+df[globl].astype(str)+'->'+ > df[globu].astype(str)'+)' will be much faster! """ molecule = get_molecule(row.id) # Get global labels if molecule in HITRAN_CLASS1: label = ( "{molec}[iso{iso:.0f}] [{branch}{jl:.0f}]({vl:.0f})->({vu:.0f})" .format( **dict([(k, row[k]) for k in ["vu", "vl", "jl", "iso"]] + [ ("molec", molecule), ("branch", _fix_branch_format[row["branch"]]), ]))) elif molecule in HITRAN_CLASS4: label = "{molec}[iso{iso:.0f}] [{branch}{jl:.0f}]({v1l:.0f}{v2l:.0f}`{l2l:.0f}`{v3l:.0f})->({v1u:.0f}{v2u:.0f}`{l2u:.0f}`{v3u:.0f})".format( **dict([(k, row[k]) for k in [ "v1u", "v2u", "l2u", "v3u", "v1l", "v2l", "l2l", "v3l", "jl", "iso", ]] + [ ("molec", molecule), ("branch", _fix_branch_format[row["branch"]]), ])) elif molecule in HITRAN_CLASS5: label = "{molec}[iso{iso:.0f}] [{branch}{jl:.0f}]({v1l:.0f}{v2l:.0f}`{l2l:.0f}`{v3l:.0f} {rl:.0f})->({v1u:.0f}{v2u:.0f}`{l2u:.0f}`{v3u:.0f} {ru:.0f})".format( **dict([(k, row[k]) for k in [ "v1u", "v2u", "l2u", "v3u", "v1l", "v2l", "l2l", "v3l", "rl", "ru", "jl", "iso", ]] + [ ("molec", molecule), ("branch", _fix_branch_format[row["branch"]]), ])) else: raise NotImplementedError( "No label for {0}. Please add it!".format(molecule)) # Add details about some line properties for k in details: name, _, unit = details[k] if is_float(row[k]): label += "<br>{0} {1}: {2:.3g} {3}".format( k, name, row[k], unit) else: label += "<br>{0} {1}: {2} {3}".format(k, name, row[k], unit) return label