#!./pyenv/bin/python from pyquery import PyQuery as pq import recordhelper as helper def sanitize(cell): if cell.text is None: return "" else: return cell.text.strip() def process_rec(key, rec): cells = pq(rec)("table")("td") records = [cells[x : x + 13] for x in xrange(0, len(cells), 13)] for record in records: helper.emit("\t".join([sanitize(cell) for cell in record])) def parse_key(text): return text.split()[0] helper.process_records(process_rec, parse_key, "__key")
#!./pyenv/bin/python #LAS standard spec: #https://esd.halliburton.com/support/LSM/GGT/ProMAXSuite/ProMAX/5000/5000_8/Help/promax/las_overview.pdf import json, las import recordhelper as helper def process_record(filename, record): if '~' not in record: return 'No proper start of record' halves = record[record.index('~'):].strip().split('~A') if len(halves) < 2: return 'Improperly separated metadata & data blocks' metadata = las.parse_metadata(\ las.sanitize(line.strip('.').strip()) for line in las.filter_lines(halves[0], ['-'])\ ) if len(metadata['curveAliases']) < 1: return 'Improperly formatted metadata block' for block in ['V', 'W', 'C']: if block in metadata: for mnemonic, val in metadata[block].iteritems(): helper.emit('%s\t%s\t%s\t%s\t%s' % \ (filename, block, mnemonic, val.get('UOM', ''), val.get('description', ''))) helper.process_records(process_record, las.parse_filename, '__key')
halves[1] = halves[1][halves[1].index('\n'):] #filter blank and lines starting with #, split resulting text into tokens tokens = '\t'.join(las.filter_lines(halves[1], ['#'])).split() except: return 'bad separation between metadata and curve data' if len(tokens) % len(metadata['curveAliases']) != 0: return 'mismatched reading count' null_vals = get_nulls(metadata) curve_aliases = metadata['curveAliases'] step_type = curve_aliases[0] for idx in xrange( 0, len(tokens), len(curve_aliases)): # idx is index of first reading on a step step_values = tokens[ idx:idx + len(curve_aliases)] # get all readings for the next step try: for idy, reading in enumerate( filter(lambda x: float(x) not in null_vals, step_values)[1:]): helper.emit('\t'.join([ filename, step_type, step_values[0], curve_aliases[idy], metadata['C'][curve_aliases[idy]].get('UOM', ''), reading ])) except ValueError: pass helper.process_records(process_record, las.parse_filename, '__key')
#Append extracted field value for each field in list of fields for field in fields: if field in text: vals.append(text.split(field + ': ')[1].split(' ')[0]) else: vals.append('null') return vals def emit(fields, cells): #Print tab separated list of fields concatted with array of extracted cell values helper.emit('\t'.join(fields + [cell.text_content() for cell in cells])) def process_rec(key, rec): if 'Average PSI' in rec: fields = extract_fields(['File No'], pq(rec).text()) cells = pq(rec)('table').eq(2)('td') #Split each 7 cell values into one row and emit record [ emit(fields, record) for record in [cells[x:x + 7] for x in xrange(0, len(cells), 7)] ] def parse_key(text): return text.split()[0] helper.process_records(process_rec, parse_key, '__key')
#!./pyenv/bin/python from pyquery import PyQuery as pq import recordhelper as helper def extract_fields(fields, text): vals = [] #Append extracted field value for each field in list of fields for field in fields: if field in text: vals.append(text.split(field + ': ')[1].split(' ')[0].replace(u'\xa0', 'null')) else: vals.append('null') return vals #Print tab separated list of fields concatted with array of extracted cell values def emit(fields, cells): helper.emit('\t'.join(fields + [cell.text_content() for cell in cells])) def process_rec(key, rec): if 'Vent/Flare' in rec: fields = extract_fields(['File No', 'Perfs', 'Spacing', 'Total Depth'], pq(rec).text()) cells = pq(rec)('table').eq(2)('td') #Split each 9 cell values into one row and emit record [emit(fields, record) for record in [cells[x:x+9] for x in xrange(0, len(cells), 9)]] def parse_key(text): return text.split()[0] #read all of input and run it through process_rec helper.process_records(process_rec, parse_key, '__key')