def setupRowCollection(): """Setup the RowDataCollection for loading the data into. """ # First entry doesn't want to have a comma in front when formatting. row_collection = RowDataCollection() types = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1] # Do the first entry separately because it has a different format string row_collection.addToCollection( do.StringData(0, format_str='{0}', default='')) for i, t in enumerate(types, 1): if t == 0: row_collection.addToCollection( do.StringData(i, format_str=', {0}', default='')) else: row_collection.addToCollection( do.FloatData(i, format_str=', {0}', no_of_dps=3, default=0.00)) # Add a couple of extra rows to the row_collection for tracking the # data in the file. row_collection.addToCollection(do.IntData('row_no')) return row_collection
def readTmfFile(datafile): """Loads the contents of the Materials CSV file referenced by datafile. Loads the data from the file referenced by the given TuflowFile object into a :class:'rowdatacollection' and a list of comment only lines. Args: datafile(TuflowFile): TuflowFile object with file details. Return: tuple: rowdatacollection, comment_lines(list). See Also: :class:'rowdatacollection'. """ value_separator = ',' comment_types = ['#', '!'] tmf_enum = dataobj.TmfEnum() path = datafile.absolutePath() value_order = range(11) row_collection = RowDataCollection() row_collection.addToCollection(do.IntData(0, format_str=None, default='')) for i in range(1, 11): row_collection.addToCollection( do.FloatData(i, format_str=', {0}', default='', no_of_dps=3)) # Keep track of any comment lines and the row numbers as well row_collection.addToCollection( do.StringData('comment', format_str=' ! {0}', default='')) row_collection.addToCollection( do.IntData('row_no', format_str=None, default='')) contents = [] logger.info('Loading data file contents from disc - %s' % (path)) contents = _loadFileFromDisc(path) # Stores the comments found in the file comment_lines = [] # Loop through the contents list loaded from file line-by-line. first_data_line = False row_count = 0 for i, line in enumerate(contents, 0): comment = hasCommentOnlyLine(line, comment_types) if comment or comment == '': comment_lines.append(comment) # If we have a line that isn't a comment or a blank then it is going # to contain materials entries. else: comment_lines.append(None) row_collection = _loadRowData(line, row_count, row_collection, tmf_enum.ITERABLE, comment_types, value_separator) row_count += 1 # Just need to reset the has_changed variable because it will have been # set to True while loading everything in. for i in range(0, len(value_order)): row_collection.getDataObject(value_order[i]).has_changed = False return row_collection, comment_lines
def readMatSubfile(main_datafile, filename, header_list, args_dict): """ """ value_separator = ',' comment_types = ['#', '!'] mat_subfile_enum = dataobj.SubfileMatEnum() path = os.path.join(main_datafile.root, filename) root = main_datafile.root header1 = 'None' header2 = 'None' if len(header_list) > 0: header1 = header_list[0] if len(header_list) > 1: header2 = header_list[1] def _scanfile(filepath): """Scans the file before we do any loading to identify the contents. Need to do this because the file can be setup in so many way that it becomes a headache to work it out in advance. Better to take a little bit of extra processing time and do some quick checks first. Arguments: file_path (str): the path to the subfile. Return: tuple: list: booleans with whether the column contains data that we want or not. int: length of the cols list. list: containing all of the first row column data int: first row with usable data on. """ logger.debug('Scanning Materials file - %s' % (filepath)) with open(filepath, 'rb') as csv_file: csv_file = csv.reader(csv_file) cols = [] head_list = [] start_row = -1 for i, row in enumerate(csv_file, 0): if "".join(row).strip() == "": break for j, col in enumerate(row, 0): if i == 0: cols.append(False) head_list = row elif uuf.isNumeric(col): cols[j] = True if start_row == -1: start_row = i elif cols[j] == True: break return cols, len(cols), head_list, start_row def _loadHeadData(row, row_collection, col_length): """ """ new_row = [None] * 12 comment_indices, length = uuf.findSubstringInList('!', row) comment_lines.append(None) head1_location = -1 head2_location = -1 row_length = len(row) for i in range(0, col_length): if i < row_length: entry = row[i].strip() if entry == header1: head1_location = i if entry == header2: head2_location = i row_collection._addValue('actual_header', entry) return row_collection, head1_location, head2_location def _loadRowData(row, row_count, row_collection, comment_lines, col_length, start_row): """Loads the data in a specific row of the file. Args: row(list): containing the row data. row_count(int): the current row number. required_headers(list): column names that must exist. Return: rowdatacollection: updated with header row details. """ # Any lines that aren't headers, but are above the first row to contain # actual data will be stored as comment lines if row_count < start_row: comment_lines.append(row) return row_collection, comment_lines else: comment_lines.append(None) if '!' in row[-1] or '#' in row[-1]: row_collection._addValue('comment', row[-1]) # Add the row data in the order that it appears in the file # from left to right. for i in range(col_length): if i < len(row): row_collection._addValue(i, row[i]) return row_collection, comment_lines try: logger.info('Loading data file contents from disc - %s' % (path)) with open(path, 'rb') as csv_file: csv_file = csv.reader(csv_file) # Do a quick check of the file setup cols, col_length, head_list, start_row = _scanfile(path) # First entry doesn't want to have a comma in front when formatting. # but all of the others do. row_collection = RowDataCollection() row_collection.addToCollection( do.FloatData(0, format_str=' {0}', default='', no_of_dps=6)) for i in range(1, len(cols)): if cols[i] == True: row_collection.addToCollection( do.FloatData(i, format_str=', {0}', default='', no_of_dps=6)) else: row_collection.addToCollection( do.StringData(i, format_str=', {0}', default='')) row_collection.addToCollection(do.StringData('actual_header', format_str='{0}', default=''), index=0) row_collection.addToCollection( do.IntData('row_no', format_str=None, default='')) # Stores the comments found in the file comment_lines = [] first_data_line = False # Loop through the contents list loaded from file line-by-line. for i, line in enumerate(csv_file, 0): comment = hasCommentOnlyLine(''.join(line), comment_types) if comment or comment == '': comment_lines.append([comment, i]) # If we have a line that isn't a comment or a blank then it is going # to contain materials entries. else: # First non-comment is the headers if first_data_line == False: first_data_line = True row_collection, head1_loc, head2_loc = _loadHeadData( line, row_collection, col_length) else: row_collection, comment_lines = _loadRowData( line, i, row_collection, comment_lines, col_length, start_row) row_collection._addValue('row_no', i) except IOError: logger.warning('Cannot load file - IOError') raise IOError('Cannot load file at: ' + path) path_holder = filetools.PathHolder(path, root) mat_sub = dataobj.DataFileSubfileMat(path_holder, row_collection, comment_lines, path_holder.filename, head1_loc, head2_loc) return mat_sub
def readMatCsvFile(datafile, args_dict={}): """Loads the contents of the Materials CSV file referenced by datafile. Loads the data from the file referenced by the given TuflowFile object into a :class:'rowdatacollection' and a list of comment only lines. Args: datafile(TuflowFile): TuflowFile object with file details. Return: tuple: rowdatacollection, comment_lines(list). See Also: :class:'rowdatacollection'. """ value_seperator = ',' comment_types = ['#', '!'] csv_enum = dataobj.MatCsvEnum() subfile_details = {} def _loadHeadData(row, row_collection): """ """ new_row = [None] * 12 if '!' in row[-1] or '#' in row[-1]: row_collection._addValue('comment', row[-1]) new_row[0] = row[0] new_row[1] = row[1] new_row[9] = row[2] new_row[11] = row[3] row_length = len(new_row) for i, v in enumerate(new_row): if i < row_length: row_collection._addValue('actual_header', new_row[i]) return row_collection def _disectEntry(col_no, entry, new_row): """Breaks the row values into the appropriate object values. The materials file can have Excel style sub-values. i.e. it can have seperate columns defined within a bigger one. This function will break those values down into a format usable by the values initiated in the rowdatacollection. Args: col_no(int): the current column number. entry(string): the value of the current column. new_row(list): the row values to update. Return: list containing the updated row values. Note: This isn't very nice. Need to clean it up and find a better, safer way of dealing with breaking the row data up. It may be excess work but perhaps creating an xml converter could work quite will and make dealing with the file a bit easier? """ made_change = False # Put in ID and Hazard as normal if col_no == 0: new_row[0] = entry elif col_no == 11: new_row[11] = entry # Possible break up Manning's entry further elif col_no == 1: # See if there's more than one value in the Manning's category. splitval = entry.split(',') # If there is and it's numeric then it's a single value for 'n' if len(splitval) == 1: if uuf.isNumeric(splitval[0]): new_row[1] = splitval[0] # Otherwise it's a filename. These can be further separated # into two column headers to read from the sub files. else: strsplit = splitval[0].split('|') if len(strsplit) == 1: subfile_details[strsplit[0].strip()] = [] new_row[6] = strsplit[0].strip() elif len(strsplit) == 2: subfile_details[strsplit[0]] = [strsplit[1].strip()] new_row[6] = strsplit[0].strip() new_row[7] = strsplit[1].strip() else: subfile_details[strsplit[0]] = [ strsplit[1].strip(), strsplit[2].strip() ] new_row[6] = strsplit[0].strip() new_row[7] = strsplit[1].strip() new_row[8] = strsplit[2].strip() # If there's more than one value then it must be the Manning's # depth curve values (N1, Y1, N2, Y2). else: new_row[2] = splitval[0] new_row[3] = splitval[1] new_row[4] = splitval[2] new_row[5] = splitval[3] # Finally grab the infiltration parameters (IL, CL) elif col_no == 2: splitval = entry.split(',') new_row[9] = splitval[0] new_row[10] = splitval[1] return new_row def _loadRowData(row, row_count, row_collection): """Loads the data in a specific row of the file. Args: row(list): containing the row data. row_count(int): the current row number. required_headers(list): column names that must exist. Return: rowdatacollection: updated with header row details. """ if '!' in row[-1] or '#' in row[-1]: row_collection._addValue('comment', row[-1]) new_row = [None] * 12 # Add the row data in the order that it appears in the file # from left to right. for i in csv_enum.ITERABLE: if i < len(row): new_row = _disectEntry(i, row[i], new_row) for val, item in enumerate(new_row): row_collection._addValue(val, item) # First entry doesn't want to have a comma in front when formatting. row_collection = RowDataCollection() types = [1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0] # Do the first entry separately because it has a different format string row_collection.addToCollection( do.StringData(0, format_str='{0}', default='')) for i, t in enumerate(types, 1): if t == 0: row_collection.addToCollection( do.StringData(i, format_str=', {0}', default='')) else: row_collection.addToCollection( do.FloatData(i, format_str=', {0}', default='', no_of_dps=3)) # Add a couple of extra rows to the row_collection for tracking the # data in the file. row_collection.addToCollection( do.StringData('comment', format_str='{0}', default='')) row_collection.addToCollection( do.StringData('actual_header', format_str='{0}', default='')) row_collection.addToCollection( do.IntData('row_no', format_str=None, default='')) path = datafile.absolutePath() try: logger.info('Loading data file contents from disc - %s' % (path)) with open(path, 'rb') as csv_file: csv_file = csv.reader(csv_file) # Stores the comments found in the file comment_lines = [] first_data_line = False line_count = 0 try: # Loop through the contents list loaded from file line-by-line. for i, line in enumerate(csv_file, 0): comment = hasCommentOnlyLine(''.join(line), comment_types) if comment or comment == '': comment_lines.append(comment) # If we have a line that isn't a comment or a blank then it is going # to contain materials entries. else: # First non-comment is the headers if first_data_line == False: first_data_line = True _loadHeadData(line, row_collection) else: _loadRowData(line, i, row_collection) row_collection._addValue('row_no', line_count) line_count += 1 comment_lines.append(None) except IndexError: logger.error( 'This file is not setup/formatted correctly for a Materials.CSV file:\n' + path) raise IndexError( 'File is not correctly formatted for a Materials.csv file') except AttributeError: logger.error( 'This file is not setup/formatted correctly for a Materials.CSV file:\n' + path) raise AttributeError( 'File is not correctly formatted for a Materials.csv file') except IOError: logger.warning('Cannot load file - IOError') raise IOError('Cannot load file at: ' + path) # Just need to reset the has_changed variable because it will have been # set to True while loading everything in. for i in range(0, len(csv_enum.ITERABLE)): row_collection.getDataObject(i).has_changed = False return row_collection, comment_lines, subfile_details
def readBcFile(datafile, args_dict={}): """Loads the contents of the BC Database file refernced by datafile. Loads the data from the file referenced by the given TuflowFile object into a :class:'rowdatacollection' and a list of comment only lines. Args: datafile(TuflowFile): TuflowFile object with file details. Return: tuple: rowdatacollection, comment_lines(list). See Also: :class:'rowdatacollection'. """ value_seperator = ',' comment_types = ['#', '!'] bc_enum = dataobj.BcEnum() bc_event_data = args_dict def _checkHeaders(row, required_headers): """Checks that any required headers can be found. Reviews the headers in the header row of the csv file to ensure that any specifically needed named column headers exist. Args: row(list): columns headers. required_headers(list): column names that must be included. Return: list if some headers not found of False otherwise. """ # Check what we have in the header row head_check = True for r in required_headers: if not r in row: head_check = False if not head_check: logger.warning('Required header (' + r + ') not' + 'found in file: ' + path) return head_check def _loadHeadData(row, row_collection, required_headers): """Loads the column header data. Adds the file defined names for the headers to the rowdatacollection. Args: row(list): containing the row data. row_collection(rowdatacollection): for updating. required_headers(list): column names that must exist. Return: rowdatacollection: updated with header row details. """ row_length = len(row) head_check = _checkHeaders(row, required_headers) for i, v in enumerate(bc_enum.ITERABLE): if i < row_length: row_collection._addValue('actual_header', row[i]) return row_collection def _loadRowData(row, row_count, row_collection): """Loads the data in a specific row of the file. Args: row(list): containing the row data. row_count(int): the current row number. required_headers(list): column names that must exist. Return: rowdatacollection: updated with header row details. """ if '!' in row[-1] or '#' in row[-1]: row_collection._addValue('comment', row[-1]) # Add the row data in the order that it appears in the file # from left to right. for i in bc_enum.ITERABLE: if i < len(row): row_collection._addValue(i, row[i]) return row_collection # Initialise the RowDataOjectCollection object with currect setup row_collection = RowDataCollection() for i, val in enumerate(bc_enum.ITERABLE): if i == 0: row_collection.addToCollection( do.StringData(i, format_str='{0}', default='')) else: row_collection.addToCollection( do.StringData(i, format_str=', {0}', default='')) row_collection.addToCollection(do.StringData('actual_header', format_str=', {0}', default=''), index=0) row_collection.addToCollection( do.IntData('row_no', format_str=None, default='')) path = datafile.absolutePath() required_headers = ['Name', 'Source'] try: logger.info('Loading data file contents from disc - %s' % (path)) with open(path, 'rU') as csv_file: csv_file = csv.reader(csv_file) # Stores the comments found in the file comment_lines = [] first_data_line = False row_count = 0 # Loop through the contents list loaded from file line-by-line. for i, line in enumerate(csv_file, 0): comment = hasCommentOnlyLine(''.join(line), comment_types) if comment or comment == '': comment_lines.append(comment) # If we have a line that isn't a comment or a blank then it is going # to contain materials entries. else: # First non-comment is the headers if first_data_line == False: first_data_line = True row_collection = _loadHeadData(line, row_collection, required_headers) else: row_collection = _loadRowData(line, i, row_collection) row_collection._addValue('row_no', row_count) row_count += 1 comment_lines.append(None) except IOError: logger.warning('Cannot load file - IOError') raise IOError('Cannot load file at: ' + path) # Just need to reset the has_changed variable because it will have been # set to True while loading everything in. for i in range(0, len(bc_enum.ITERABLE)): row_collection.dataObject(i).has_changed = False return row_collection, comment_lines