def write_segy_based_on(hf, segy_y_reader, data, endian='>'): """ Purpose: make it easy to write new (processed) data back to a new segy file based on the original segy file Args: fh: A file-like object open for binary write. segy_y_reader: a SegYReader instance from which the segy headers and other parameters can be retreived data: a numpy array containing trace samples organised in (trace, inline, crossline order) """ encoding = segy_y_reader.encoding if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Writing SEG Y", encoding) write_textual_reel_header(fh, segy_y_reader.textual_reel_header, encoding) write_binary_reel_header(fh, segy_y_reader.binary_reel_header, endian) write_extended_textual_headers(fh, segy_y_reader.extended_textual_header, encoding) trace_header_packer = HeaderPacker(trace_header_format, endian) il_xl = list(itertools.product(segy_y_reader.inline_range(), segy_y_reader.xline_range())) x_y = list(itertools.product(range(0,segy_y_reader.num_inlines()), range(0,segy_y_reader.num_xlines()))) for ix,ij in zip(il_xl, x_y): idx = reader.trace_index((ix[0],ix[1])) write_trace_header(fh, segy_y_reader.trace_header(idx), trace_header_packer) write_trace_samples(fh, data[:,ij[0],ij[1]], segy_y_reader.data_sample_format, endian=endian)
def write_segy(fh, dataset, encoding=None, trace_header_format=TraceHeaderRev1, endian='>', progress=None): """ Args: fh: A file-like object open for binary write, positioned to write the textual reel header. dataset: An object implementing the interface of segpy.dataset.Dataset, such as a SegYReader. trace_header_format: The class which defines the layout of the trace header. Defaults to TraceHeaderRev1. encoding: Optional encoding for text data. Typically 'cp037' for EBCDIC or 'ascii' for ASCII. If omitted, the seg_y_data object will be queries for an encoding property. endian: Big endian by default. If omitted, the seg_y_data object will be queried for an encoding property. progress: A unary callable which will be passed a number between zero and one indicating the progress made. If provided, this callback will be invoked at least once with an argument equal to one. Raises: UnsupportedEncodingError: If the specified encoding is neither ASCII nor EBCDIC UnicodeError: If textual data provided cannot be encoded into the required encoding. """ progress_callback = progress if progress is not None else lambda p: None if not callable(progress_callback): raise TypeError("write_segy(): progress callback must be callable") encoding = encoding or (hasattr(dataset, 'encoding') and dataset.encoding) or ASCII if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Writing SEG Y", encoding) write_textual_reel_header(fh, dataset.textual_reel_header, encoding) write_binary_reel_header(fh, dataset.binary_reel_header, endian) write_extended_textual_headers(fh, dataset.extended_textual_header, encoding) trace_header_packer = make_header_packer(trace_header_format, endian) num_traces = dataset.num_traces() for trace_index in dataset.trace_indexes(): write_trace_header(fh, dataset.trace_header(trace_index), trace_header_packer) write_trace_samples(fh, dataset.trace_samples(trace_index), dataset.data_sample_format, endian=endian) progress_callback(trace_index / num_traces) progress_callback(1)
def write_segy(fh, seg_y_data, encoding=None, trace_header_format=TraceHeaderRev1, endian='>', progress=None): """ Args: fh: A file-like object open for binary write, positioned to write the textual reel header. seg_y_data: An object from which the headers and trace_samples data can be retrieved. Requires the following properties and methods: seg_y_data.textual_reel_header seg_y_data.binary_reel_header seg_y_data.extended_textual_header seg_y_data.trace_indexes seg_y_data.trace_header(trace_index) seg_y_data.trace_samples(trace_index) seg_y_data.encoding seg_y_data.endian One such legitimate object would be a SegYReader instance. trace_header_format: The class which defines the layout of the trace header. Defaults to TraceHeaderRev1. encoding: Optional encoding for text data. Typically 'cp037' for EBCDIC or 'ascii' for ASCII. If omitted, the seg_y_data object will be queries for an encoding property. endian: Big endian by default. If omitted, the seg_y_data object will be queried for an encoding property. progress: An optional progress bar object. Raises: UnsupportedEncodingError: If the specified encoding is neither ASCII nor EBCDIC UnicodeError: If textual data provided cannot be encoded into the required encoding. """ encoding = encoding or (hasattr(seg_y_data, 'encoding') and seg_y_data.encoding) or ASCII if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Writing SEG Y", encoding) write_textual_reel_header(fh, seg_y_data.textual_reel_header, encoding) write_binary_reel_header(fh, seg_y_data.binary_reel_header, endian) write_extended_textual_headers(fh, seg_y_data.extended_textual_header, encoding) trace_header_packer = make_header_packer(trace_header_format, endian) for trace_index in seg_y_data.trace_indexes(): write_trace_header(fh, seg_y_data.trace_header(trace_index), trace_header_packer) write_trace_samples(fh, seg_y_data.trace_samples(trace_index), seg_y_data.data_sample_format, endian=endian)
def write_extended_textual_headers(fh, pages, encoding): """Write extended textual headers. Args: fh: A file-like object open in binary mode for writing. pages: An iterables series of sequences of Unicode strings, where the outer iterable represents 3200 byte pages, each comprised of a sequence of exactly 40 strings of nominally 80 characters each. Although Unicode strings are accepted, and when encoded they should result in exact 80 bytes sequences. To produce a valid data structure for pages, consider using format_extended_textual_header() encoding: Either 'cp037' for EBCDIC or 'ascii' for ASCII. Post-condition: The file pointer in fh will be position at the first byte after the extended textual headers, which is also the first byte of the first trace-header. Raises: ValueError: If the provided header data has the wrong shape. UnicodeError: If the textual data could not be encoded into the specified encoding. """ if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Writing extended textual header", encoding) fh.seek(REEL_HEADER_NUM_BYTES) encoded_pages = [] for page_index, page in enumerate(pages): encoded_page = [] # TODO: Share some of this code with writing the textual reel header. for line_index, line in enumerate(page): encoded_line = line.encode(encoding) num_encoded_bytes = len(encoded_line) if num_encoded_bytes != CARD_LENGTH: raise ValueError( "Extended textual header line {} of page {} at {} bytes is not " "{} bytes".format(line_index, page_index, num_encoded_bytes, CARD_LENGTH)) encoded_page.append(encoded_line) num_encoded_lines = len(encoded_page) if num_encoded_lines != CARDS_PER_HEADER: raise ValueError("Extended textual header page {} number of " "lines {} is not {}".format( page_index, num_encoded_lines, CARDS_PER_HEADER)) encoded_pages.append(encoded_page), for encoded_page in encoded_pages: concatenated_page = EMPTY_BYTE_STRING.join(encoded_page) assert (len(concatenated_page) == TEXTUAL_HEADER_NUM_BYTES) fh.write(concatenated_page)
def write_segy(fh, seg_y_data, encoding=None, trace_header_format=TraceHeaderRev1, endian='>', progress=None): """ Args: fh: A file-like object open for binary write, positioned to write the textual reel header. seg_y_data: An object from which the headers and trace_samples data can be retrieved. Requires the following properties and methods: seg_y_data.textual_reel_header seg_y_data.binary_reel_header seg_y_data.extended_textual_header seg_y_data.trace_indexes seg_y_data.trace_header(trace_index) seg_y_data.trace_samples(trace_index) seg_y_data.encoding seg_y_data.endian seg_y_data.data_sample_format One such legitimate object would be a SegYReader instance. trace_header_format: The class which defines the layout of the trace header. Defaults to TraceHeaderRev1. encoding: Optional encoding for text data. Typically 'cp037' for EBCDIC or 'ascii' for ASCII. If omitted, the seg_y_data object will be queries for an encoding property. endian: Big endian by default. If omitted, the seg_y_data object will be queried for an encoding property. progress: An optional progress bar object. Raises: UnsupportedEncodingError: If the specified encoding is neither ASCII nor EBCDIC UnicodeError: If textual data provided cannot be encoded into the required encoding. """ encoding = encoding or (hasattr(seg_y_data, 'encoding') and seg_y_data.encoding) or ASCII if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Writing SEG Y", encoding) write_textual_reel_header(fh, seg_y_data.textual_reel_header, encoding) write_binary_reel_header(fh, seg_y_data.binary_reel_header, endian) write_extended_textual_headers(fh, seg_y_data.extended_textual_header, encoding) trace_header_packer = make_header_packer(trace_header_format, endian) for trace_index in seg_y_data.trace_indexes(): write_trace_header(fh, seg_y_data.trace_header(trace_index), trace_header_packer) write_trace_samples(fh, seg_y_data.trace_samples(trace_index), seg_y_data.data_sample_format, endian=endian)
def write_segy(fh, dataset, encoding=None, trace_header_format=TraceHeaderRev1, endian='>', progress=None): """ Args: fh: A file-like object open for binary write, positioned to write the textual reel header. dataset: An object implementing the interface of segpy.dataset.Dataset, such as a SegYReader. trace_header_format: The class which defines the layout of the trace header. Defaults to TraceHeaderRev1. encoding: Optional encoding for text data. Typically 'cp037' for EBCDIC or 'ascii' for ASCII. If omitted, the seg_y_data object will be queries for an encoding property. endian: Big endian by default. If omitted, the dataset object will be queried for an encoding property. progress: A unary callable which will be passed a number between zero and one indicating the progress made. If provided, this callback will be invoked at least once with an argument equal to one. Raises: UnsupportedEncodingError: If the specified encoding is neither ASCII nor EBCDIC UnicodeError: If textual data provided cannot be encoded into the required encoding. """ progress_callback = progress if progress is not None else lambda p: None if not callable(progress_callback): raise TypeError("write_segy(): progress callback must be callable") encoding = encoding or (hasattr(dataset, 'encoding') and dataset.encoding) or ASCII if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Writing SEG Y", encoding) write_textual_reel_header(fh, dataset.textual_reel_header, encoding) write_binary_reel_header(fh, dataset.binary_reel_header, endian) write_extended_textual_headers(fh, dataset.extended_textual_header, encoding) trace_header_packer = make_header_packer(trace_header_format, endian) num_traces = dataset.num_traces() for trace_index in dataset.trace_indexes(): write_trace_header(fh, dataset.trace_header(trace_index), trace_header_packer) write_trace_samples(fh, dataset.trace_samples(trace_index), dataset.data_sample_format, endian=endian) progress_callback(trace_index / num_traces) progress_callback(1)
def write_extended_textual_headers(fh, pages, encoding): """Write extended textual headers. Args: fh: A file-like object open in binary mode for writing. pages: An iterables series of sequences of Unicode strings, where the outer iterable represents 3200 byte pages, each comprised of a sequence of exactly 40 strings of nominally 80 characters each. Although Unicode strings are accepted, and when encoded they should result in exact 80 bytes sequences. To produce a valid data structure for pages, consider using format_extended_textual_header() encoding: Either 'cp037' for EBCDIC or 'ascii' for ASCII. Post-condition: The file pointer in fh will be position at the first byte after the extended textual headers, which is also the first byte of the first trace-header. Raises: ValueError: If the provided header data has the wrong shape. UnicodeError: If the textual data could not be encoded into the specified encoding. """ if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Writing extended textual header", encoding) fh.seek(REEL_HEADER_NUM_BYTES) encoded_pages = [] for page_index, page in enumerate(pages): encoded_page = [] # TODO: Share some of this code with writing the textual reel header. for line_index, line in enumerate(page): encoded_line = line.encode(encoding) num_encoded_bytes = len(encoded_line) if num_encoded_bytes != CARD_LENGTH: raise ValueError("Extended textual header line {} of page {} at {} bytes is not " "{} bytes".format(line_index, page_index, num_encoded_bytes, CARD_LENGTH)) encoded_page.append(encoded_line) num_encoded_lines = len(encoded_page) if num_encoded_lines != CARDS_PER_HEADER: raise ValueError("Extended textual header page {} number of " "lines {} is not {}".format(num_encoded_lines, CARDS_PER_HEADER)) encoded_pages.append(encoded_page) for encoded_page in encoded_pages: concatenated_page = EMPTY_BYTE_STRING.join(encoded_page) assert(len(concatenated_page) == TEXTUAL_HEADER_NUM_BYTES) fh.write(concatenated_page)
def write_textual_reel_header(fh, lines, encoding): """Write the SEG Y card image header, also known as the textual header Args: fh: A file-like object open in binary mode positioned such that the beginning of the textual header will be the next byte to read. lines: An iterable series of forty lines, each of which must be a Unicode string of CARD_LENGTH characters. The first three characters of each line are often "C 1" to "C40" (as required by the SEG Y standard) although this is not enforced by this function, since many widespread SEG Y readers and writers do not adhere to this constraint. To produce a SEG Y compliant series of header lines consider using the format_standard_textual_header() function. Any lines longer than CARD_LENGTH characters will be truncated without warning. Any excess lines over CARDS_PER_HEADER will be discarded. Short or omitted lines will be padded with spaces. encoding: Typically 'cp037' for EBCDIC or 'ascii' for ASCII. Post-condition: The file pointer in fh will be positioned at the first byte following the textual header. Raises: UnsupportedEncodingError: If encoding is neither EBCDIC nor ASCII. UnicodeError: If the data provided in lines cannot be encoded with the encoding. """ if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Writing textual reel header", encoding) fh.seek(0) padded_lines = [line.encode(encoding).ljust(CARD_LENGTH, ' '.encode(encoding))[:CARD_LENGTH] for line in pad(lines, padding='', size=CARDS_PER_HEADER)] joined_header = EMPTY_BYTE_STRING.join(padded_lines) assert len(joined_header) == 3200 fh.write(joined_header) fh.seek(TEXTUAL_HEADER_NUM_BYTES)
def write_segy(fh, dataset, encoding=None, trace_header_format=TraceHeaderRev1, endian='>', progress=None): """ Args: fh: A file-like object open for binary write, positioned to write the textual reel header. dataset: An object implementing the interface of segpy.dataset.Dataset, such as a SegYReader. trace_header_format: The class which defines the layout of the trace header. Defaults to TraceHeaderRev1. encoding: Optional encoding for text data. Typically 'cp037' for EBCDIC or 'ascii' for ASCII. If omitted, the seg_y_data object will be queries for an encoding property. endian: Big endian by default. If omitted, the seg_y_data object will be queried for an encoding property. progress: An optional progress bar object. Raises: UnsupportedEncodingError: If the specified encoding is neither ASCII nor EBCDIC UnicodeError: If textual data provided cannot be encoded into the required encoding. """ encoding = encoding or (hasattr(dataset, 'encoding') and dataset.encoding) or ASCII if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Writing SEG Y", encoding) write_textual_reel_header(fh, dataset.textual_reel_header, encoding) write_binary_reel_header(fh, dataset.binary_reel_header, endian) write_extended_textual_headers(fh, dataset.extended_textual_header, encoding) trace_header_packer = make_header_packer(trace_header_format, endian) for trace_index in dataset.trace_indexes(): write_trace_header(fh, dataset.trace_header(trace_index), trace_header_packer) write_trace_samples(fh, dataset.trace_samples(trace_index), dataset.data_sample_format, endian=endian)
def format_extended_textual_header(text, encoding, include_text_stop=False): """Format a string into pages and line suitable for an extended textual header. Args text: An arbitrary text string. Any universal newlines will be preserved. encoding: Either ASCII ('ascii') or EBCDIC ('cp037') include_text_stop: If True, a text stop stanza header will be appended, otherwise not. """ if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Extended textual header", encoding) # According to the standard: "The Extended Textual File Header consists of one or more 3200-byte records, each # record containing 40 lines of textual card-image text." It goes on "... Each line in an Extended Textual File # Header ends in carriage return and linefeed (EBCDIC 0D25 or ASCII 0D0A)." Given that we're dealing with fixed- # length (80 byte) lines, this implies that we have 78 bytes of space into which we can encode the content of each # line, which must be left-justified and padded with spaces. width = CARD_LENGTH - len(HEADER_NEWLINE) original_lines = text.splitlines() # Split overly long lines (i.e. > 78) and pad too-short lines with spaces lines = [] for original_line in original_lines: padded_lines = (pad_and_terminate_header_line( original_line[i:i + width], width) for i in range(0, len(original_line), width)) lines.extend(padded_lines) pages = list(batched(lines, 40, pad_and_terminate_header_line('', width))) if include_text_stop: stop_page = format_extended_textual_header(END_TEXT_STANZA, encoding)[0] pages.append(stop_page) return pages
def format_extended_textual_header(text, encoding, include_text_stop=False): """Format a string into pages and line suitable for an extended textual header. Args text: An arbitrary text string. Any universal newlines will be preserved. encoding: Either ASCII ('ascii') or EBCDIC ('cp037') include_text_stop: If True, a text stop stanza header will be appended, otherwise not. """ if not is_supported_encoding(encoding): raise UnsupportedEncodingError("Extended textual header", encoding) # According to the standard: "The Extended Textual File Header consists of one or more 3200-byte records, each # record containing 40 lines of textual card-image text." It goes on "... Each line in an Extended Textual File # Header ends in carriage return and linefeed (EBCDIC 0D25 or ASCII 0D0A)." Given that we're dealing with fixed- # length (80 byte) lines, this implies that we have 78 bytes of space into which we can encode the content of each # line, which must be left-justified and padded with spaces. width = CARD_LENGTH - len(HEADER_NEWLINE) original_lines = text.splitlines() # Split overly long lines (i.e. > 78) and pad too-short lines with spaces lines = [] for original_line in original_lines: padded_lines = (pad_and_terminate_header_line(original_line[i:i+width], width) for i in range(0, len(original_line), width)) lines.extend(padded_lines) pages = list(batched(lines, 40, pad_and_terminate_header_line('', width))) if include_text_stop: stop_page = format_extended_textual_header(END_TEXT_STANZA, encoding)[0] pages.append(stop_page) return pages
def test_unsupported_encoding_raises_unsupported_encoding_error( self, encoding): assume(not is_supported_encoding(encoding)) with raises(UnsupportedEncodingError): toolkit.write_extended_textual_headers(None, [], encoding)
def test_unsupported_encoding_raises_unsupported_encoding_error( self, encoding): assume(not is_supported_encoding(encoding)) with raises(UnsupportedEncodingError): toolkit.format_extended_textual_header('', encoding)
def test_unsupported_encoding_raises_unsupported_encoding_error(self, encoding): assume(not is_supported_encoding(encoding)) with raises(UnsupportedEncodingError): toolkit.write_extended_textual_headers(None, [], encoding)
def test_unsupported_encoding_raises_unsupported_encoding_error(self, encoding): assume(not is_supported_encoding(encoding)) with raises(UnsupportedEncodingError): toolkit.format_extended_textual_header('', encoding)