def test_columns_number(self): s = ( r"#COLUMNINFO = 1, m, Diepte bovenkant laag, 1 #COLUMNINFO = 2, m, Diepte onderkant laag, 2" r"#COLUMNINFO = 3, mm, Zandmediaan, 8" r"#COLUMNINFO = 4, mm, Grindmediaan, 9") v = utils.parse_columns_number(s) self.assertEqual(v, 4)
def __init__(self, header_s, data_s): """ Parser of the borehole file. :param header_s: (str) Header of the file :param data_s: (str) Data of the file """ self.type = 'bore' self.project_id = utils.parse_project_type(header_s, 'bore') columns_number = utils.parse_columns_number(header_s) column_separator = utils.parse_column_separator(header_s) record_separator = utils.parse_record_separator(header_s) data_s_rows = data_s.split(record_separator) data_rows_soil = self.extract_soil_info(data_s_rows, columns_number, column_separator) self.df = (self.parse_data_column_info( header_s, data_s, column_separator, columns_number).pipe( self.parse_data_soil_code, data_rows_soil).pipe( self.parse_data_soil_type, data_rows_soil).pipe( self.parse_add_info_as_string, data_rows_soil)).join( self.data_soil_quantified(data_rows_soil))[[ 'depth_top', 'depth_bottom', 'Soil_code', 'Gravel', 'Sand', 'Clay', 'Loam', 'Peat', 'Silt' ]] self.df.columns = [ 'depth_top', 'depth_bottom', 'soil_code', 'G', 'S', 'C', 'L', 'P', 'SI' ]
def __init__(self, path=None, string=None): """ Parser of the borehole file. Parameters ---------- path: str Path to the *.gef file. string: str String version of the *.gef file. """ super().__init__(path=path, string=string) if self.type == "bore": pass elif self.type == "borehole-report": raise ValueError( "The selected gef file is a GEF-BOREHOLE-Report. Can only parse " "GEF-CPT-Report and GEF-BORE-Report. Check the PROCEDURECODE.") else: raise ValueError("The selected gef file is not a borehole. " "Check the REPORTCODE or the PROCEDURECODE.") self.project_id = utils.parse_project_type(self._headers, "bore") self.nen_version = "NEN 5104" # This is usually not correct for the boringen columns_number = utils.parse_columns_number(self._headers) column_separator = utils.parse_column_separator(self._headers) record_separator = utils.parse_record_separator(self._headers) data_s_rows = self._data.split(record_separator) data_rows_soil = self.extract_soil_info(data_s_rows, columns_number, column_separator) self.df = (self.parse_data_column_info( self._headers, self._data, column_separator, columns_number).pipe( self.parse_data_soil_code, data_rows_soil).pipe( self.parse_data_soil_type, data_rows_soil).pipe( self.parse_add_info_as_string, data_rows_soil).pipe(self.parse_soil_quantification, data_rows_soil)) # Drop the columns if they exist, do nothing if they don't for column in [ "sand_median", "gravel_median", "lutum_percentage", "silt_percentage", "sand_percentage", "gravel_percentage", "organic_matter_percentage", "soil_type", ]: if column in self.df.columns: self.df.drop_in_place(column)
def determine_column_names(headers, columns_number=None, columns_info=None): if columns_number is None and columns_info is None: columns_number = utils.parse_columns_number(headers) if columns_number is not None: columns_info = [] for column_number in range(1, columns_number + 1): columns_info.append( utils.parse_column_info( headers, column_number, MAP_QUANTITY_NUMBER_COLUMN_NAME_CPT)) return columns_info
def __init__(self, header_s, data_s): """ Parser of the borehole file. :param header_s: (str) Header of the file :param data_s: (str) Data of the file """ self.type = "bore" self.project_id = utils.parse_project_type(header_s, "bore") # This is usually not correct for the boringen columns_number = utils.parse_columns_number(header_s) column_separator = utils.parse_column_separator(header_s) record_separator = utils.parse_record_separator(header_s) data_s_rows = data_s.split(record_separator) data_rows_soil = self.extract_soil_info( data_s_rows, columns_number, column_separator ) self.df = ( self.parse_data_column_info( header_s, data_s, column_separator, columns_number ) .pipe(self.parse_data_soil_code, data_rows_soil) .pipe(self.parse_data_soil_type, data_rows_soil) .pipe(self.parse_add_info_as_string, data_rows_soil) ).join(self.data_soil_quantified(data_rows_soil))[ [ "depth_top", "depth_bottom", "Soil_code", "Gravel", "Sand", "Clay", "Loam", "Peat", "Silt", "remarks", ] ] self.df.columns = [ "depth_top", "depth_bottom", "soil_code", "G", "S", "C", "L", "P", "SI", "Remarks", ]
def parse_data(header_s, data_s, columns_number=None, columns_info=None): df = {} if columns_number is None and columns_info is None: columns_number = utils.parse_columns_number(header_s) if columns_number is not None: columns_info = [] for column_number in range(1, columns_number + 1): column_info = utils.parse_column_info( header_s, column_number, MAP_QUANTITY_NUMBER_COLUMN_NAME_CPT) columns_info.append(column_info) df = pd.read_csv(io.StringIO(data_s.replace('!', '')), sep=r';|\s+|,|\|\s*', names=columns_info, index_col=False, engine='python') return df
def parse_data(header_s, data_s, columns_number=None, columns_info=None): if columns_number is None and columns_info is None: columns_number = utils.parse_columns_number(header_s) if columns_number is not None: columns_info = [] for column_number in range(1, columns_number + 1): column_info = utils.parse_column_info( header_s, column_number, MAP_QUANTITY_NUMBER_COLUMN_NAME_CPT ) columns_info.append(column_info) new_data = data_s.replace("!", "") separator = utils.find_separator(header_s) return pd.read_csv( io.StringIO(new_data), sep=separator, names=columns_info, index_col=False, engine="python", )
def __init__(self, path=None, string=None): """ Parser of the borehole file. :param path:(str) Path of the .gef file to parse. :param string:(str) String to parse. """ self.path = path self.s = string self.zid = None # ground level self.x = None self.y = None self.type = None self.end_depth_of_penetration_test = None self.project_id = None self.column_separator = None self.record_separator = None self.file_date = None self.project_id = None self.type = None # List of all the possible measurement variables if self.s is None: with open(path, encoding='utf-8', errors='ignore') as f: self.s = f.read() end_of_header = utils.parse_end_of_header(self.s) header_s, data_s = self.s.split(end_of_header) columns_number = utils.parse_columns_number(header_s) self.file_date = utils.parse_file_date(header_s) self.project_id = utils.parse_project_type(header_s, self.type) self.type = utils.parse_gef_type(header_s) self.x = utils.parse_xid_as_float(header_s) self.y = utils.parse_yid_as_float(header_s) self.zid = utils.parse_zid_as_float(header_s) column_separator = utils.parse_column_separator(header_s) record_separator = utils.parse_record_separator(header_s) data_s_rows = data_s.split(record_separator) data_rows_soil = self.extract_soil_info(data_s_rows, columns_number, column_separator) df_column_info = self.parse_data_column_info(header_s, data_s, column_separator, columns_number) df_soil_type = self.parse_data_soil_type(data_rows_soil) df_soil_code = self.parse_data_soil_code(data_rows_soil) df_soil_quantified = self.data_soil_quantified(data_rows_soil) df_additional_info = self.parse_add_info_as_string(data_rows_soil) df_bore_more_info = pd.concat([ df_column_info, df_soil_code, df_soil_type, df_soil_quantified, df_additional_info ], axis=1, sort=False) self.df = df_bore_more_info[[ 'depth_top', 'depth_bottom', 'Soil_code', 'Gravel', 'Sand', 'Clay', 'Loam', 'Peat', 'Silt' ]] self.df.columns = [ 'depth_top', 'depth_bottom', 'soil_code', 'G', 'S', 'C', 'L', 'P', 'S' ]