def __init__(self, path, data=None, params=None, del_invalid=False, invalid_columns=None, invalid_value=None): c = Converter() log = c.set_file(path) params_l = log.parameter if (params != None): self.heads = {} for key, value in params.items(): self.__dict__[value] = params_l[key]['value'] self.heads[value] = self.__dict__[value] print(self.heads) self.heads = pd.DataFrame(self.heads, index=[0]) if (data != None): self.data = {} for key, value in data.items(): self.__dict__[value] = log.data[key] self.data[value] = self.__dict__[value] self.data = pd.DataFrame(self.data) if (del_invalid): self.data = self.data.dropna()
def __init__(self,path,data=None,params=None,del_invalid=False): """ Конструктор: path - путь до файла .las data - данные, что нужно достать из файла {"Название параметра в файле": "Новое его название для dataframe"} params - параметры, что нужно достать из файла {"Название параметра в файле": "Новое его название для dataframe"} del_invalid -Удалять ли невалидные значения """ c= Converter() self.log = c.set_file(path) params_l=self.log.parameter if(params!=None): self.heads={} for key,value in params.items(): self.__dict__[value] =params_l[key]['value'] self.heads[value]= self.__dict__[value] self.heads=pd.DataFrame(self.heads,index=[0]) if(data!=None): self.data={} for key,value in data.items(): self.__dict__[value] =self.log.data[key] self.data[value]=self.__dict__[value] self.data=pd.DataFrame(self.data) if(del_invalid): self.data=self.data.dropna()
def parse_las_data(las_data_doc): s3.Object(target_bucket.name, las_data_doc).download_file(f'/tmp/las_processing.las') c = Converter() log = c.set_file('/tmp/las_processing.las') dict_from_las = log.get_dict() def parse_las_data(top_data_doc): pass data = dict_from_las['data'] las_columns = { 'depth': data['dept'], 'caliper': data['cali'], 'bulk_density': data['den'], 'delta_t_compressional': data['dt'], 'neutron_porosity_in_limestone_units': data['neu'], 'resistivity_shallow': data['resslw'], 'resistivity_deep': data['res_dep_ind'], 'spontaneous_potential': data['sp'], 'spontaneous_potential_corrected': data['spc'] } las_df = pd.DataFrame(las_columns) las_df['latitude'] = dict_from_las['well']['LATI']['value'] las_df['longitude'] = dict_from_las['well']['LONG']['value'] las_df['geo_point'] = las_df['latitude'].astype( str) + "," + las_df['longitude'].astype(str) las_df['field_name'] = dict_from_las['well']['FLD']['value'] las_df['country'] = dict_from_las['well']['CTRY']['value'] las_df['operator'] = dict_from_las['well']['COMP']['value'] las_df['wellname'] = dict_from_las['well']['WELL']['value'] def frame2doc(dataframe): global this_files_docs this_files_docs = 0 body = [] for row in dataframe.index: body.append({'index': {'_index': args.index, '_type': '_doc'}}) body.append(dataframe.loc[row].to_json()) global total_docs total_docs += 1 this_files_docs += 1 response = es.bulk(body=body) frame2doc(las_df) print('Indexed', str(this_files_docs), 'documents from LAS data file', str(las_data_doc)) global total_files total_files += 1
def __init__(self,path,data=None,params=None,del_invalid=False): """ Конструктор: path - путь до файла .las data - данные, что нужно достать из файла {"Название параметра в файле": "Новое его название для dataframe"} params - параметры, что нужно достать из файла {"Название параметра в файле": "Новое его название для dataframe"} del_invalid -Удалять ли невалидные значения """ c= Converter() self.log = c.set_file(path) self.well = self.log.well params_l=self.log.parameter try: loc = self.log.well["LOC"]["value"] self.well_X,self.well_Y=re.findall(r'\d+\.\d+',loc) except Exception as e: print('Скорее всего отсутсвует поле loc, либо его формат неверен. Посмотрите stackTrace выше') if(params!=None): self.heads={} for key,value in params.items(): self.__dict__[value] =params_l[key]['value'] self.heads[value]= self.__dict__[value] self.heads=pd.DataFrame(self.heads,index=[0]) if(data!=None): self.data={} for key,value in data.items(): self.__dict__[value] =self.log.data[key] self.data[value]=self.__dict__[value] self.data=pd.DataFrame(self.data) if(del_invalid): self.data=self.data.dropna() else: self.data = {} for key, value in self.log.data.items(): self.__dict__[key] = self.log.data[key] self.data[key] = self.__dict__[key] self.data = pd.DataFrame(self.data) if (del_invalid): self.data = self.data.dropna()
def setUp(self): self.cv = Converter() self.log_input_file = self.cv.set_file("files/sample3.las")
class ConverterTest(TestCase): def setUp(self): self.cv = Converter() self.log_input_file = self.cv.set_file("files/sample3.las") def test_version(self): """LAS version is 2.0""" self.assertEqual(self.log_input_file.version, 2.0) def test_version_section(self): version_section = expected.version self.assertDictEqual(version_section, self.log_input_file.version_section) def test_well_section(self): well = expected.well self.assertDictEqual(well, self.log_input_file.well) def test_data_section(self): data = expected.data self.assertDictEqual(data, self.log_input_file.data) def test_parameter_section(self): parameter = expected.parameter self.assertDictEqual(parameter, self.log_input_file.parameter) def test_curve_section(self): curve = expected.curve self.assertDictEqual(curve, self.log_input_file.curve) def test_data_keys_equal_curve_keys(self): curve = self.log_input_file.curve.keys() data = self.log_input_file.data.keys() curve = {e.lower() for e in curve} self.assertEqual(curve, data) def test_data_keys_equal_curve_keys_sample_2(self): log_input_file = self.cv.set_file("files/sample2.las") curve = log_input_file.curve.keys() data = log_input_file.data.keys() curve = {e.lower() for e in curve} self.assertEqual(curve, data) def test_file_supported(self): with self.assertRaises(Exception) as E: self.cv = self.cv.set_file("files/sample1.json") self.assertTrue("File format no supported!", E.exception) def test_version_supported(self): with self.assertRaises(Exception) as E: self.cv = self.cv.set_file("files/sample0.las") self.assertTrue("Version not supported!", E.exception) def test_input_bytes_equal_input_file(self): log_input_bytes = self.cv.set_stream(expected.bytes_list) self.assertDictEqual(log_input_bytes.get_dict(), self.log_input_file.get_dict()) def test_out_sampel_1_not_equal_out_sample_3(self): log_sample_1 = self.cv.set_file("files/sample1.las").get_dict() log_sample_3 = self.cv.set_file("files/sample3.las").get_dict() self.assertNotEqual(log_sample_1, log_sample_3)
def parse_las_data(las_data_doc, index_name, es): c = Converter() # Read it. If something goes wrong, skip the file log = None try: log = c.set_file(las_data_doc) except Exception as ex: logging.warn(ex) return False meta_data = {} curve_data = {} data = {} # Programatically get all meta data fields and store them nicely for meta_key, meta_value in log.get_dict()['well'].items(): if meta_value is not None: meta_data[meta_value['desc'].replace( ' ', '_').lower()] = meta_value['value'] # If the expected latitude and longitude fields are not present - skip if 'surf._latitude' not in meta_data or 'surf._longitude' not in meta_data: logging.warn( 'Different latitude and longitude fields present.. skipping') return False # If the latitude and longitude formats are not in the expected format - skip if _is_float(meta_data['surf._latitude']) is False or _is_float( meta_data['surf._longitude']) is False: logging.warn( "Different latitude or longitude format. Only supporting decimal format as that is what was provided in the sample... Skipping" ) return False # Programatically get all curve data and names and store them nicely for curve_key, curve_value in log.get_dict()['curve'].items(): curve_data[curve_key.lower()] = { "name": curve_value['desc'].split(" ")[1].replace(' ', '_').lower(), "unit": curve_value['unit'] } # Get the actual curve data and store it in a dict for Pandas to read data[curve_key.lower()] = log.get_dict()['data'][curve_key.lower()] # Read the curve data into pandas which automagically tidies a lot up try: las_df = pd.DataFrame(data) except Exception as ex: logging.error(ex) return False all_data = [] #Iterate over every row in the data for _, row in las_df.iterrows(): # Get each row as json and remove any fields with the null value in them clean_row = { curve_data[key]['name']: val for key, val in row.items() if val != -999.2500 } # Build up the Elasticsearch document all_data.append({ "_index": index_name, "_type": "_doc", "_source": { "data": clean_row, "geo_point": { "lat": meta_data['surf._latitude'], "lon": meta_data['surf._longitude'] }, **meta_data } }) # Upload the entire LAS file logging.info(f"Uploading {len(all_data)}") helpers.bulk(es, all_data, raise_on_exception=False, raise_on_error=False) logging.info( f'Indexed {len(all_data)} records from LAS data file {str(las_data_doc)}' ) return True