Пример #1
0
def readMatCsvFile(datafile):
    """Loads the contents of the Materials CSV file referenced by datafile.
    
    Loads the data from the file referenced by the given TuflowFile object into
    a :class:'rowdatacollection' and a list of comment only lines.
    
    Args:
        datafile(TuflowFile): TuflowFile object with file details.
        
    Return:
        tuple: rowdatacollection, comment_lines(list).
        
    See Also:
        :class:'rowdatacollection'.
    """
    value_seperator = ','
    comment_types = ['#', '!']
    csv_enum = dataobj.MatCsvEnum()
    subfile_details = {}

    def _loadHeadData(row, row_collection):
        """
        """
        new_row = [None] * 12

        if '!' in row[-1] or '#' in row[-1]:
            row_collection.addValue('comment', row[-1])

        new_row[0] = row[0]
        new_row[1] = row[1]
        new_row[9] = row[2]
        new_row[11] = row[3]

        row_length = len(new_row)
        for i, v in enumerate(new_row):
            if i < row_length:
                row_collection.addValue('actual_header', new_row[i])

        return row_collection

    def _disectEntry(col_no, entry, new_row):
        """Breaks the row values into the appropriate object values.
        
        The materials file can have Excel style sub-values. i.e. it can have
        seperate columns defined within a bigger one. This function will break
        those values down into a format usable by the values initiated in the
        rowdatacollection.
        
        Args:
            col_no(int): the current column number.
            entry(string): the value of the current column.
            new_row(list): the row values to update.
            
        Return:
            list containing the updated row values.
        
        Note:
            This isn't very nice. Need to clean it up and find a better, safer
            way of dealing with breaking the row data up. It may be excess work
            but perhaps creating an xml converter could work quite will and
            make dealing with the file a bit easier?
        """
        made_change = False

        # Put in ID and Hazard as normal
        if col_no == 0:
            new_row[0] = entry
        elif col_no == 11:
            new_row[11] = entry
        # Possible break up Manning's entry further
        elif col_no == 1:
            # See if there's more than one value in the Manning's category.
            splitval = entry.split(',')

            # If there is and it's numeric then it's a single value for 'n'
            if len(splitval) == 1:
                if uuf.isNumeric(splitval[0]):
                    new_row[1] = splitval[0]

                # Otherwise it's a filename. These can be further separated
                # into two column headers to read from the sub files.
                else:
                    strsplit = splitval[0].split('|')
                    if len(strsplit) == 1:
                        subfile_details[strsplit[0].strip()] = []
                        new_row[6] = strsplit[0].strip()
                    elif len(strsplit) == 2:
                        subfile_details[strsplit[0]] = [strsplit[1].strip()]
                        new_row[6] = strsplit[0].strip()
                        new_row[7] = strsplit[1].strip()
                    else:
                        subfile_details[strsplit[0]] = [
                            strsplit[1].strip(), strsplit[2].strip()
                        ]
                        new_row[6] = strsplit[0].strip()
                        new_row[7] = strsplit[1].strip()
                        new_row[8] = strsplit[2].strip()

            # If there's more than one value then it must be the Manning's
            # depth curve values (N1, Y1, N2, Y2).
            else:
                new_row[2] = splitval[0]
                new_row[3] = splitval[1]
                new_row[4] = splitval[2]
                new_row[5] = splitval[3]

        # Finally grab the infiltration parameters (IL, CL)
        elif col_no == 2:
            splitval = entry.split(',')
            new_row[9] = splitval[0]
            new_row[10] = splitval[1]

        return new_row

    def _loadRowData(row, row_count, row_collection):
        """Loads the data in a specific row of the file.
        
        Args:
            row(list): containing the row data.
            row_count(int): the current row number.
            required_headers(list): column names that must exist.

        Return:
            rowdatacollection: updated with header row details.
        """
        if '!' in row[-1] or '#' in row[-1]:
            row_collection.addValue('comment', row[-1])
        new_row = [None] * 12

        # Add the row data in the order that it appears in the file
        # from left to right.
        for i in csv_enum.ITERABLE:
            if i < len(row):
                new_row = _disectEntry(i, row[i], new_row)

        for val, item in enumerate(new_row):
            row_collection.addValue(val, item)

    # First entry doesn't want to have a comma in front when formatting.
    row_collection = RowDataCollection()
    types = [1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0]

    # Do the first entry separately because it has a different format string
    row_collection.initCollection(
        do.StringData(0, 0, format_str='{0}', default=''))
    for i, t in enumerate(types, 1):
        if t == 0:
            row_collection.initCollection(
                do.StringData(i, i, format_str=', {0}', default=''))
        else:
            row_collection.initCollection(
                do.FloatData(i, i, format_str=', {0}', default='',
                             no_of_dps=3))

    # Add a couple of extra rows to the row_collection for tracking the
    # data in the file.
    row_collection.initCollection(
        do.StringData(12, 'comment', format_str='{0}', default=''))
    row_collection.initCollection(
        do.StringData(13, 'actual_header', format_str='{0}', default=''))
    row_collection.initCollection(
        do.IntData(15, 'row_no', format_str=None, default=''))

    path = datafile.getAbsolutePath()
    try:
        logger.info('Loading data file contents from disc - %s' % (path))
        with open(path, 'rb') as csv_file:
            csv_file = csv.reader(csv_file)

            # Stores the comments found in the file
            comment_lines = []
            first_data_line = False
            line_count = 0

            try:
                # Loop through the contents list loaded from file line-by-line.
                for i, line in enumerate(csv_file, 0):

                    comment = hasCommentOnlyLine(''.join(line), comment_types)
                    if comment or comment == '':
                        comment_lines.append(comment)

                    # If we have a line that isn't a comment or a blank then it is going
                    # to contain materials entries.
                    else:
                        # First non-comment is the headers
                        if first_data_line == False:
                            first_data_line = True
                            _loadHeadData(line, row_collection)
                        else:
                            _loadRowData(line, i, row_collection)

                        row_collection.addValue('row_no', line_count)
                        line_count += 1
                        comment_lines.append(None)
            except IndexError:
                logger.error(
                    'This file is not setup/formatted correctly for a Materials.CSV file:\n'
                    + path)
                raise IndexError(
                    'File is not correctly formatted for a Materials.csv file')
            except AttributeError:
                logger.error(
                    'This file is not setup/formatted correctly for a Materials.CSV file:\n'
                    + path)
                raise AttributeError(
                    'File is not correctly formatted for a Materials.csv file')

    except IOError:
        logger.warning('Cannot load file - IOError')
        raise IOError('Cannot load file at: ' + path)

    # Just need to reset the has_changed variable because it will have been
    # set to True while loading everything in.
    for i in range(0, len(csv_enum.ITERABLE)):
        row_collection.getDataObject(i).has_changed = False

    return row_collection, comment_lines, subfile_details
Пример #2
0
def readTmfFile(datafile):
    """Loads the contents of the Materials CSV file referenced by datafile.
    
    Loads the data from the file referenced by the given TuflowFile object into
    a :class:'rowdatacollection' and a list of comment only lines.
    
    Args:
        datafile(TuflowFile): TuflowFile object with file details.
        
    Return:
        tuple: rowdatacollection, comment_lines(list).
        
    See Also:
        :class:'rowdatacollection'.
    """
    value_separator = ','
    comment_types = ['#', '!']
    tmf_enum = dataobj.TmfEnum()

    path = datafile.getAbsolutePath()
    value_order = range(11)

    row_collection = RowDataCollection()
    row_collection.initCollection(do.IntData(0, 0, format_str=None,
                                             default=''))
    for i in range(1, 11):
        row_collection.initCollection(
            do.FloatData(i, i, format_str=', {0}', default='', no_of_dps=3))

    # Keep track of any comment lines and the row numbers as well
    row_collection.initCollection(
        do.StringData(11, 'comment', format_str=' ! {0}', default=''))
    row_collection.initCollection(
        do.IntData(12, 'row_no', format_str=None, default=''))

    contents = []
    logger.info('Loading data file contents from disc - %s' % (path))
    contents = _loadFileFromDisc(path)

    # Stores the comments found in the file
    comment_lines = []

    # Loop through the contents list loaded from file line-by-line.
    first_data_line = False
    row_count = 0
    for i, line in enumerate(contents, 0):

        comment = hasCommentOnlyLine(line, comment_types)
        if comment or comment == '':
            comment_lines.append(comment)

        # If we have a line that isn't a comment or a blank then it is going
        # to contain materials entries.
        else:
            comment_lines.append(None)
            row_collection = _loadRowData(line, row_count, row_collection,
                                          tmf_enum.ITERABLE, comment_types,
                                          value_separator)
            row_count += 1

    # Just need to reset the has_changed variable because it will have been
    # set to True while loading everything in.
    for i in range(0, len(value_order)):
        row_collection.getDataObject(value_order[i]).has_changed = False

    return row_collection, comment_lines
Пример #3
0
def readBcFile(datafile):
    """Loads the contents of the BC Database file refernced by datafile.
    
    Loads the data from the file referenced by the given TuflowFile object into
    a :class:'rowdatacollection' and a list of comment only lines.
    
    Args:
        datafile(TuflowFile): TuflowFile object with file details.
        
    Return:
        tuple: rowdatacollection, comment_lines(list).
        
    See Also:
        :class:'rowdatacollection'.
    """
    value_seperator = ','
    comment_types = ['#', '!']
    bc_enum = dataobj.BcEnum()

    def _checkHeaders(row, required_headers):
        """Checks that any required headers can be found.
        
        Reviews the headers in the header row of the csv file to ensure that
        any specifically needed named column headers exist.
        
        Args:
            row(list): columns headers.
            required_headers(list): column names that must be included.
        
        Return:
            list if some headers not found of False otherwise.
        """
        # Check what we have in the header row
        head_check = True
        for r in required_headers:
            if not r in row:
                head_check = False
        if not head_check:
            logger.warning('Required header (' + r + ') not' +
                           'found in file: ' + path)
        return head_check

    def _loadHeadData(row, row_collection, required_headers):
        """Loads the column header data.
        
        Adds the file defined names for the headers to the rowdatacollection.
        
        Args:
            row(list): containing the row data.
            row_collection(rowdatacollection): for updating.
            required_headers(list): column names that must exist.
        
        Return:
            rowdatacollection: updated with header row details.
        """
        row_length = len(row)
        head_check = _checkHeaders(row, required_headers)
        for i, v in enumerate(bc_enum.ITERABLE):
            if i < row_length:
                row_collection.addValue('actual_header', row[i])

        return row_collection

    def _loadRowData(row, row_count, row_collection):
        """Loads the data in a specific row of the file.
        
        Args:
            row(list): containing the row data.
            row_count(int): the current row number.
            required_headers(list): column names that must exist.

        Return:
            rowdatacollection: updated with header row details.
        """
        if '!' in row[-1] or '#' in row[-1]:
            row_collection.addValue('comment', row[-1])

        # Add the row data in the order that it appears in the file
        # from left to right.
        for i in bc_enum.ITERABLE:
            if i < len(row):
                row_collection.addValue(i, row[i])

        return row_collection

    # Initialise the RowDataOjectCollection object with currect setup
    row_collection = RowDataCollection()
    for i, val in enumerate(bc_enum.ITERABLE):
        if i == 0:
            row_collection.initCollection(
                do.StringData(i, i, format_str='{0}', default=''))
        else:
            row_collection.initCollection(
                do.StringData(i, i, format_str=', {0}', default=''))

    row_collection.initCollection(
        do.StringData(0, 'actual_header', format_str=', {0}', default=''))
    row_collection.initCollection(
        do.IntData(15, 'row_no', format_str=None, default=''))

    path = datafile.getAbsolutePath()
    required_headers = ['Name', 'Source']
    try:
        logger.info('Loading data file contents from disc - %s' % (path))
        with open(path, 'rb') as csv_file:
            csv_file = csv.reader(csv_file)

            # Stores the comments found in the file
            comment_lines = []
            first_data_line = False
            row_count = 0
            # Loop through the contents list loaded from file line-by-line.
            for i, line in enumerate(csv_file, 0):

                comment = hasCommentOnlyLine(''.join(line), comment_types)
                if comment or comment == '':
                    comment_lines.append(comment)

                # If we have a line that isn't a comment or a blank then it is going
                # to contain materials entries.
                else:
                    # First non-comment is the headers
                    if first_data_line == False:
                        first_data_line = True
                        row_collection = _loadHeadData(line, row_collection,
                                                       required_headers)
                    else:
                        row_collection = _loadRowData(line, i, row_collection)
                        row_collection.addValue('row_no', row_count)
                        row_count += 1

                    comment_lines.append(None)

    except IOError:
        logger.warning('Cannot load file - IOError')
        raise IOError('Cannot load file at: ' + path)

    # Just need to reset the has_changed variable because it will have been
    # set to True while loading everything in.
    for i in range(0, len(bc_enum.ITERABLE)):
        row_collection.getDataObject(i).has_changed = False

    return row_collection, comment_lines