Пример #1
0
    def __init__(self,
                 path,
                 data=None,
                 params=None,
                 del_invalid=False,
                 invalid_columns=None,
                 invalid_value=None):
        c = Converter()
        log = c.set_file(path)
        params_l = log.parameter

        if (params != None):
            self.heads = {}
            for key, value in params.items():
                self.__dict__[value] = params_l[key]['value']
                self.heads[value] = self.__dict__[value]
            print(self.heads)
            self.heads = pd.DataFrame(self.heads, index=[0])

        if (data != None):
            self.data = {}
            for key, value in data.items():
                self.__dict__[value] = log.data[key]
                self.data[value] = self.__dict__[value]
            self.data = pd.DataFrame(self.data)

            if (del_invalid):
                self.data = self.data.dropna()
Пример #2
0
 def __init__(self,path,data=None,params=None,del_invalid=False):
     """
         Конструктор:
             path - путь до файла .las
             data   - данные, что нужно достать из файла {"Название параметра в файле": "Новое его название для dataframe"}
             params - параметры, что нужно достать из файла {"Название параметра в файле": "Новое его название для dataframe"}
             del_invalid -Удалять ли невалидные значения 
             
     """
     c= Converter()
     self.log = c.set_file(path)
     params_l=self.log.parameter
     if(params!=None):
         self.heads={}
         for key,value in params.items():
             self.__dict__[value] =params_l[key]['value']
             self.heads[value]= self.__dict__[value]
       
         self.heads=pd.DataFrame(self.heads,index=[0])
     
     if(data!=None):
         self.data={}
         for key,value in data.items():
             self.__dict__[value] =self.log.data[key]
             self.data[value]=self.__dict__[value]     
         self.data=pd.DataFrame(self.data)
         
         if(del_invalid):
             self.data=self.data.dropna()
Пример #3
0
def parse_las_data(las_data_doc):

    s3.Object(target_bucket.name,
              las_data_doc).download_file(f'/tmp/las_processing.las')

    c = Converter()

    log = c.set_file('/tmp/las_processing.las')

    dict_from_las = log.get_dict()

    def parse_las_data(top_data_doc):
        pass

    data = dict_from_las['data']
    las_columns = {
        'depth': data['dept'],
        'caliper': data['cali'],
        'bulk_density': data['den'],
        'delta_t_compressional': data['dt'],
        'neutron_porosity_in_limestone_units': data['neu'],
        'resistivity_shallow': data['resslw'],
        'resistivity_deep': data['res_dep_ind'],
        'spontaneous_potential': data['sp'],
        'spontaneous_potential_corrected': data['spc']
    }

    las_df = pd.DataFrame(las_columns)
    las_df['latitude'] = dict_from_las['well']['LATI']['value']
    las_df['longitude'] = dict_from_las['well']['LONG']['value']
    las_df['geo_point'] = las_df['latitude'].astype(
        str) + "," + las_df['longitude'].astype(str)
    las_df['field_name'] = dict_from_las['well']['FLD']['value']
    las_df['country'] = dict_from_las['well']['CTRY']['value']
    las_df['operator'] = dict_from_las['well']['COMP']['value']
    las_df['wellname'] = dict_from_las['well']['WELL']['value']

    def frame2doc(dataframe):
        global this_files_docs
        this_files_docs = 0
        body = []
        for row in dataframe.index:
            body.append({'index': {'_index': args.index, '_type': '_doc'}})
            body.append(dataframe.loc[row].to_json())

            global total_docs
            total_docs += 1

            this_files_docs += 1

        response = es.bulk(body=body)

    frame2doc(las_df)

    print('Indexed', str(this_files_docs), 'documents from LAS data file',
          str(las_data_doc))

    global total_files
    total_files += 1
Пример #4
0
 def __init__(self,path,data=None,params=None,del_invalid=False):
     """
         Конструктор:
             path - путь до файла .las
             data   - данные, что нужно достать из файла {"Название параметра в файле": "Новое его название для dataframe"}
             params - параметры, что нужно достать из файла {"Название параметра в файле": "Новое его название для dataframe"}
             del_invalid -Удалять ли невалидные значения 
             
     """
     c= Converter()
     self.log = c.set_file(path)
     self.well = self.log.well
     params_l=self.log.parameter
     try:
         
         loc = self.log.well["LOC"]["value"]
         self.well_X,self.well_Y=re.findall(r'\d+\.\d+',loc)
     except Exception as e:
         print('Скорее всего отсутсвует поле loc, либо его формат неверен. Посмотрите stackTrace выше')
     if(params!=None):
         self.heads={}
         for key,value in params.items():
             self.__dict__[value] =params_l[key]['value']
             self.heads[value]= self.__dict__[value]
       
         self.heads=pd.DataFrame(self.heads,index=[0])
     
     if(data!=None):
         self.data={}
         for key,value in data.items():
             self.__dict__[value] =self.log.data[key]
             self.data[value]=self.__dict__[value]     
         self.data=pd.DataFrame(self.data)
         
         if(del_invalid):
             self.data=self.data.dropna()
     else:
         self.data = {}
         for key, value in self.log.data.items():
             self.__dict__[key] = self.log.data[key]
             self.data[key] = self.__dict__[key]
         self.data = pd.DataFrame(self.data)
         if (del_invalid):
             self.data = self.data.dropna()
 def setUp(self):
     self.cv = Converter()
     self.log_input_file = self.cv.set_file("files/sample3.las")
class ConverterTest(TestCase):
    def setUp(self):
        self.cv = Converter()
        self.log_input_file = self.cv.set_file("files/sample3.las")

    def test_version(self):
        """LAS version is 2.0"""
        self.assertEqual(self.log_input_file.version, 2.0)

    def test_version_section(self):
        version_section = expected.version
        self.assertDictEqual(version_section,
                             self.log_input_file.version_section)

    def test_well_section(self):
        well = expected.well
        self.assertDictEqual(well, self.log_input_file.well)

    def test_data_section(self):
        data = expected.data
        self.assertDictEqual(data, self.log_input_file.data)

    def test_parameter_section(self):
        parameter = expected.parameter
        self.assertDictEqual(parameter, self.log_input_file.parameter)

    def test_curve_section(self):
        curve = expected.curve
        self.assertDictEqual(curve, self.log_input_file.curve)

    def test_data_keys_equal_curve_keys(self):
        curve = self.log_input_file.curve.keys()
        data = self.log_input_file.data.keys()
        curve = {e.lower() for e in curve}
        self.assertEqual(curve, data)

    def test_data_keys_equal_curve_keys_sample_2(self):
        log_input_file = self.cv.set_file("files/sample2.las")
        curve = log_input_file.curve.keys()
        data = log_input_file.data.keys()
        curve = {e.lower() for e in curve}
        self.assertEqual(curve, data)

    def test_file_supported(self):
        with self.assertRaises(Exception) as E:
            self.cv = self.cv.set_file("files/sample1.json")
        self.assertTrue("File format no supported!", E.exception)

    def test_version_supported(self):
        with self.assertRaises(Exception) as E:
            self.cv = self.cv.set_file("files/sample0.las")
        self.assertTrue("Version not supported!", E.exception)

    def test_input_bytes_equal_input_file(self):
        log_input_bytes = self.cv.set_stream(expected.bytes_list)
        self.assertDictEqual(log_input_bytes.get_dict(),
                             self.log_input_file.get_dict())

    def test_out_sampel_1_not_equal_out_sample_3(self):
        log_sample_1 = self.cv.set_file("files/sample1.las").get_dict()
        log_sample_3 = self.cv.set_file("files/sample3.las").get_dict()
        self.assertNotEqual(log_sample_1, log_sample_3)
Пример #7
0
def parse_las_data(las_data_doc, index_name, es):
    c = Converter()

    # Read it. If something goes wrong, skip the file
    log = None
    try:
        log = c.set_file(las_data_doc)
    except Exception as ex:
        logging.warn(ex)
        return False

    meta_data = {}
    curve_data = {}
    data = {}

    # Programatically get all meta data fields and store them nicely
    for meta_key, meta_value in log.get_dict()['well'].items():
        if meta_value is not None:
            meta_data[meta_value['desc'].replace(
                ' ', '_').lower()] = meta_value['value']

    # If the expected latitude and longitude fields are not present - skip
    if 'surf._latitude' not in meta_data or 'surf._longitude' not in meta_data:
        logging.warn(
            'Different latitude and longitude fields present.. skipping')

        return False

    # If the latitude and longitude formats are not in the expected format - skip
    if _is_float(meta_data['surf._latitude']) is False or _is_float(
            meta_data['surf._longitude']) is False:
        logging.warn(
            "Different latitude or longitude format. Only supporting decimal format as that is what was provided in the sample... Skipping"
        )
        return False

    # Programatically get all curve data and names and store them nicely
    for curve_key, curve_value in log.get_dict()['curve'].items():
        curve_data[curve_key.lower()] = {
            "name": curve_value['desc'].split("  ")[1].replace(' ',
                                                               '_').lower(),
            "unit": curve_value['unit']
        }
        # Get the actual curve data and store it in a dict for Pandas to read
        data[curve_key.lower()] = log.get_dict()['data'][curve_key.lower()]

    # Read the curve data into pandas which automagically tidies a lot up
    try:
        las_df = pd.DataFrame(data)
    except Exception as ex:
        logging.error(ex)
        return False

    all_data = []

    #Iterate over every row in the data
    for _, row in las_df.iterrows():
        # Get each row as json and remove any fields with the null value in them
        clean_row = {
            curve_data[key]['name']: val
            for key, val in row.items() if val != -999.2500
        }

        # Build up the Elasticsearch document
        all_data.append({
            "_index": index_name,
            "_type": "_doc",
            "_source": {
                "data": clean_row,
                "geo_point": {
                    "lat": meta_data['surf._latitude'],
                    "lon": meta_data['surf._longitude']
                },
                **meta_data
            }
        })

    # Upload the entire LAS file
    logging.info(f"Uploading {len(all_data)}")
    helpers.bulk(es, all_data, raise_on_exception=False, raise_on_error=False)

    logging.info(
        f'Indexed {len(all_data)} records from LAS data file {str(las_data_doc)}'
    )
    return True