def create_dataframe_multi_index(inp_path, section='CURVES'): # format the section header for look up in headers OrderedDict sect = remove_braces(section).upper() # get list of all section headers in inp to use as section ending flags headers = get_inp_sections_details(inp_path, include_brackets=False) if sect not in headers: warnings.warn(f'{sect} section not found in {inp_path}') return pd.DataFrame() # extract the string and read into a dataframe start_string = format_inp_section_header(section) end_strings = [format_inp_section_header(h) for h in headers.keys()] s = extract_section_of_file(inp_path, start_string, end_strings) cols = headers[sect]['columns'] f = StringIO(s) data = [] for line in f.readlines(): items = line.strip().split() if len(items) == 3: items = [items[0], None, items[1], items[2]] if len(items) == 4: data.append(items) df = pd.DataFrame(data=data, columns=cols) df = df.set_index(['Name', 'Type']) return df
def dataframe_from_inp(inp_path, section, additional_cols=None, quote_replace=' ', **kwargs): """ create a dataframe from a section of an INP file :param inp_path: :param section: :param additional_cols: :param skip_headers: :param quote_replace: :return: """ # format the section header for look up in headers OrderedDict sect = remove_braces(section).upper() # get list of all section headers in inp to use as section ending flags headers = get_inp_sections_details(inp_path, include_brackets=False) if sect not in headers: warnings.warn(f'{sect} section not found in {inp_path}') return pd.DataFrame() # extract the string and read into a dataframe start_string = format_inp_section_header(section) end_strings = [format_inp_section_header(h) for h in headers.keys()] s = extract_section_of_file(inp_path, start_string, end_strings, **kwargs) # replace occurrences of double quotes "" s = s.replace('""', quote_replace) # and get the list of columns to use for parsing this section # add any additional columns needed for special cases (build instructions) additional_cols = [] if additional_cols is None else additional_cols cols = headers[sect]['columns'] + additional_cols if headers[sect]['columns'][0] == 'blob': # return the whole row, without specific col headers return pd.read_csv(StringIO(s), delim_whitespace=False) else: try: df = pd.read_csv(StringIO(s), header=None, delim_whitespace=True, skiprows=[0], index_col=0, names=cols) except IndexError: print( f'failed to parse {section} with cols: {cols}. head:\n{s[:500]}' ) raise return df
def test_format_inp_section_header(): header_string = '[CONDUITS]' header_string = format_inp_section_header(header_string) assert (header_string == '[CONDUITS]') header_string = '[conduits]' header_string = format_inp_section_header(header_string) assert (header_string == '[CONDUITS]') header_string = 'JUNCTIONS' header_string = format_inp_section_header(header_string) assert (header_string == '[JUNCTIONS]') header_string = 'pumps' header_string = format_inp_section_header(header_string) assert (header_string == '[PUMPS]')
def write_inp_section(file_object, allheaders, sectionheader, section_data, pad_top=True, na_fill=''): """ given an open file object, list of header sections, the current section header, and the section data in a Pandas Dataframe format, this function writes the data to the file object. """ f = file_object add_str = '' sectionheader = format_inp_section_header(sectionheader) if not section_data.empty: if pad_top: f.write('\n\n' + sectionheader + '\n') # add SWMM-friendly header e.g. [DWF] else: f.write(sectionheader + '\n') if allheaders and (sectionheader in allheaders ) and allheaders[sectionheader]['columns'] == [ 'blob' ]: # to left justify based on the longest string in the blob column formatter = '{{:<{}s}}'.format( section_data[sectionheader].str.len().max()).format add_str = section_data.fillna('').to_string( index_names=False, header=False, index=False, justify='left', formatters={sectionheader: formatter}) else: # naming the columns to the index name so the it prints in-line with col headers f.write(';;') # to left justify on longest string in the Comment column # this is overly annoying, to deal with 'Objects' vs numbers to remove # two bytes added from the double semicolon header thing (to keep things lined up) objectformatter = { hedr: ' {{:<{}}}'.format( section_data[hedr].apply(str).str.len().max()).format for hedr in section_data.columns } numformatter = { hedr: ' {{:<{}}}'.format( section_data[hedr].apply(str).str.len().max()).format for hedr in section_data.columns if section_data[hedr].dtype != "O" } objectformatter.update(numformatter) add_str = section_data.fillna(na_fill).to_string( index_names=False, header=True, justify='left', formatters=objectformatter # {'Comment':formatter} ) # write the dataframe as a string f.write(add_str + '\n\n')
def get_inp_sections_details(inp_path, include_brackets=False): """ creates a dictionary with all the headers found in an INP file (which varies based on what the user has defined in a given model) and updates them based on the definitions in inp_header_dict this ensures the list is comprehensive :param inp_path: :param include_brackets: whether to parse sections including the [] :return: OrderedDict >>> from swmmio.tests.data import MODEL_FULL_FEATURES_XY >>> headers = get_inp_sections_details(MODEL_FULL_FEATURES_XY) >>> [header for header, cols in headers.items()][:4] ['TITLE', 'OPTIONS', 'EVAPORATION', 'RAINGAGES'] >>> headers['SUBCATCHMENTS']['columns'] ['Name', 'Raingage', 'Outlet', 'Area', 'PercImperv', 'Width', 'PercSlope', 'CurbLength', 'SnowPack'] """ from swmmio.defs import INP_OBJECTS import pandas as pd found_sects = OrderedDict() with open(inp_path) as f: for line in f: sect_not_found = True for sect_id, data in INP_OBJECTS.items(): # find the start of an INP section search_tag = format_inp_section_header(sect_id) if search_tag.lower() in line.lower(): if include_brackets: sect_id = '[{}]'.format(sect_id.upper()) found_sects[sect_id.upper()] = data sect_not_found = False break if sect_not_found: if '[' and ']' in line: h = line.strip() if not include_brackets: h = h.replace('[', '').replace(']', '') found_sects[h] = OrderedDict(columns=['blob']) # make necessary adjustments to columns that change based on options ops_cols = INP_OBJECTS['OPTIONS']['columns'] ops_string = extract_section_of_file( inp_path, '[OPTIONS]', INP_SECTION_TAGS, ) options = pd.read_csv(StringIO(ops_string), header=None, delim_whitespace=True, skiprows=[0], index_col=0, names=ops_cols) if 'INFILTRATION' in found_sects: # select the correct infiltration column names # fall back to HORTON if invalid/unset infil type infil_type = options['Value'].get('INFILTRATION', None) if pd.isna(infil_type): infil_type = 'HORTON' infil_cols = INFILTRATION_COLS[infil_type] inf_id = 'INFILTRATION' if include_brackets: inf_id = '[{}]'.format('INFILTRATION') # overwrite the dynamic sections with proper header cols found_sects[inf_id]['columns'] = list(infil_cols) return found_sects