def create_reader( fh, encoding=None, trace_header_format=TraceHeaderRev1, endian='>', progress=None, cache_directory= None, dim = None): """Create a SegYReader based on performing a scan of SEG Y data. This function is the preferred method for creating SegYReader objects. It reads basic header information and attempts to build indexes for traces, CDP numbers (for 2D surveys), and inline and cross line co-ordinates (for 3D surveys) to facilitate subsequent random-access to traces. Args: fh: A file-like-object open in binary mode positioned such that the beginning of the reel header will be the next byte to be read. For disk-based SEG Y files, this is the beginning of the file. encoding: An optional text encoding for the textual headers. If None (the default) a heuristic will be used to guess the header encoding. trace_header_format: An optional class defining the layout of the trace header. Defaults to TraceHeaderRev1. endian: '>' for big-endian data (the standard and default), '<' for little-endian (non-standard) progress: A unary callable which will be passed a number between zero and one indicating the progress made. If provided, this callback will be invoked at least once with an argument equal to one. cache_directory: The directory for the cache file. Relative paths are interpreted as being relative to the directory containing the SEG Y file. Absolute paths are used as is. If cache_directory is None, caching is disabled. dim: Sometimes cpd_catalog and line_catalog are not correct indicator of 2D or 3D data, when dim is not none, force the reader to treat data as 2d (dim = 2) or 3d (dim = 3). When dim = none, fall back to the original logic of segpy and try to automatically infer the dimension from data. (added by Wei Liu) Raises: ValueError: The file-like object``fh`` is unsuitable for some reason, such as not being open, not being seekable, not being in binary mode, or being too short. Returns: A SegYReader object. Depending on the exact type of the SegYReader returned different capabilities may be available. Inspect the returned object to determine these capabilities, or be prepared for capabilities not defined in the SegYReader base class to be unavailable. The underlying file-like object must remain open for the duration of use of the returned reader object. It is the caller's responsibility to close the underlying file. """ if hasattr(fh, 'encoding') and fh.encoding is not None: raise TypeError( "SegYReader must be provided with a binary mode file object") if not fh.seekable(): raise TypeError( "SegYReader must be provided with a seekable file object") if fh.closed: raise ValueError( "SegYReader must be provided with an open file object") num_file_bytes = file_length(fh) if num_file_bytes < REEL_HEADER_NUM_BYTES: raise ValueError( "SEG Y file {!r} of {} bytes is too short".format( filename_from_handle(fh), num_file_bytes)) if endian not in ('<', '>'): raise ValueError("Unrecognised endian value {!r}".format(endian)) reader = None cache_file_path = None if cache_directory is not None: # sha1 = hash_for_file(fh, encoding, trace_header_format, endian) seg_y_path = filename_from_handle(fh) # cache_file_path = _locate_cache_file(seg_y_path, cache_directory, sha1) _, fname = os.path.split(fh.name) fname, _ = os.path.splitext(fname) cache_file_path = os.path.join(cache_directory, fname + '.pkl') if os.path.exists(cache_file_path): reader = _load_reader_from_cache(cache_file_path, seg_y_path) print('reader.py, reader loaded from {}'.format(cache_file_path)) if reader is None: reader = _make_reader(fh, encoding, trace_header_format, endian, progress, dim = dim) if cache_directory is not None: _save_reader_to_cache(reader, cache_file_path) print('reader.py, reader cached to {}'.format(cache_file_path)) return reader
def create_reader( fh, encoding=None, trace_header_format=TraceHeaderRev1, endian='>', progress=None, cache_directory=".segpy", dimensionality=None): """Create a SegYReader based on performing a scan of SEG Y data. This function is the preferred method for creating SegYReader objects. It reads basic header information and attempts to build indexes for traces, CDP numbers (for 2D surveys), and inline and cross line co-ordinates (for 3D surveys) to facilitate subsequent random-access to traces. Args: fh: A file-like-object open in binary mode positioned such that the beginning of the reel header will be the next byte to be read. For disk-based SEG Y files, this is the beginning of the file. encoding: An optional text encoding for the textual headers. If None (the default) a heuristic will be used to guess the header encoding. trace_header_format: An optional class defining the layout of the trace header. Defaults to TraceHeaderRev1. endian: '>' for big-endian data (the standard and default), '<' for little-endian (non-standard) progress: A unary callable which will be passed a number between zero and one indicating the progress made. If provided, this callback will be invoked at least once with an argument equal to one. cache_directory: The directory for the cache file. Relative paths are interpreted as being relative to the directory containing the SEG Y file. Absolute paths are used as is. If cache_directory is None, caching is disabled. dimensionality: An optional integer to force the dimensionality of the created reader. Accepted values are None, 1, 2 and 3. If None (the default) various heuristics will be used to guess the dimensionality of the data. Raises: ValueError: The file-like object``fh`` is unsuitable for some reason, such as not being open, not being seekable, not being in binary mode, or being too short. Returns: A SegYReader object. Depending on the exact type of the SegYReader returned different capabilities may be available. Inspect the returned object to determine these capabilities, or be prepared for capabilities not defined in the SegYReader base class to be unavailable. The underlying file-like object must remain open for the duration of use of the returned reader object. It is the caller's responsibility to close the underlying file. """ if hasattr(fh, 'encoding') and fh.encoding is not None: raise TypeError( "SegYReader must be provided with a binary mode file object") if not fh.seekable(): raise TypeError( "SegYReader must be provided with a seekable file object") if fh.closed: raise ValueError( "SegYReader must be provided with an open file object") num_file_bytes = file_length(fh) if num_file_bytes < REEL_HEADER_NUM_BYTES: raise ValueError( "SEG Y file {!r} of {} bytes is too short".format( filename_from_handle(fh), num_file_bytes)) if endian not in ('<', '>'): raise ValueError("Unrecognised endian value {!r}".format(endian)) progress_callback = progress if progress is not None else lambda p: None if not callable(progress_callback): raise TypeError("create_reader(): progress callback must be callable") if dimensionality not in (None, 1, 2, 3): raise ValueError("dimensionality {!r} is not an of 1, 2, 3 or None.".format(dimensionality)) reader = None cache_file_path = None if cache_directory is not None: sha1 = hash_for_file(fh, encoding, trace_header_format, endian) seg_y_path = filename_from_handle(fh) cache_file_path = _locate_cache_file(seg_y_path, cache_directory, sha1) if cache_file_path is not None: reader = _load_reader_from_cache(cache_file_path, seg_y_path) if reader is None: reader = _make_reader(fh, encoding, trace_header_format, endian, progress_callback, dimensionality) if cache_directory is not None: _save_reader_to_cache(reader, cache_file_path) progress_callback(1) return reader
def catalog_traces(fh, bps, trace_header_format=TraceHeaderRev1, endian='>', progress=None): """Build catalogs to facilitate random access to trace_samples data. Note: This function can take significant time to run, proportional to the number of traces in the SEG Y file. Four catalogs will be build: 1. A catalog mapping trace_samples index (0-based) to the position of that trace_samples header in the file. 2. A catalog mapping trace_samples index (0-based) to the number of samples in that trace_samples. 3. A catalog mapping CDP number to the trace_samples index. 4. A catalog mapping an (inline, crossline) number 2-tuple to trace_samples index. Args: fh: A file-like-object open in binary mode, positioned at the start of the first trace_samples header. bps: The number of bytes per sample, such as obtained by a call to bytes_per_sample() trace_header_format: The class defining the trace header format. Defaults to TraceHeaderRev1. endian: '>' for big-endian data (the standard and default), '<' for little-endian (non-standard) progress: A unary callable which will be passed a number between zero and one indicating the progress made. If provided, this callback will be invoked at least once with an argument equal to 1 Returns: A 4-tuple of the form (trace_samples-offset-catalog, trace_samples-length-catalog, cdp-catalog, line-catalog)` where each catalog is an instance of ``collections.Mapping`` or None if no catalog could be built. """ progress_callback = progress if progress is not None else lambda p: None if not callable(progress_callback): raise TypeError("catalog_traces(): progress callback must be callable") class CatalogSubFormat(metaclass=SubFormatMeta, parent_format=trace_header_format, parent_field_names=( 'file_sequence_num', 'ensemble_num', 'num_samples', 'inline_number', 'crossline_number', )): pass trace_header_packer = make_header_packer(CatalogSubFormat, endian) length = file_length(fh) pos_begin = fh.tell() trace_offset_catalog_builder = CatalogBuilder() trace_length_catalog_builder = CatalogBuilder() line_catalog_builder = CatalogBuilder() alt_line_catalog_builder = CatalogBuilder() cdp_catalog_builder = CatalogBuilder() for trace_number in count(): progress_callback(_READ_PROPORTION * pos_begin / length) fh.seek(pos_begin) data = fh.read(TRACE_HEADER_NUM_BYTES) if len(data) < TRACE_HEADER_NUM_BYTES: break trace_header = trace_header_packer.unpack(data) num_samples = trace_header.num_samples trace_length_catalog_builder.add(trace_number, num_samples) samples_bytes = num_samples * bps trace_offset_catalog_builder.add(trace_number, pos_begin) # Should we check the data actually exists? line_catalog_builder.add((trace_header.inline_number, trace_header.crossline_number), trace_number) alt_line_catalog_builder.add((trace_header.file_sequence_num, trace_header.ensemble_num), trace_number) cdp_catalog_builder.add(trace_header.ensemble_num, trace_number) pos_end = pos_begin + TRACE_HEADER_NUM_BYTES + samples_bytes pos_begin = pos_end progress_callback(_READ_PROPORTION) trace_offset_catalog = trace_offset_catalog_builder.create() progress_callback(_READ_PROPORTION + (_READ_PROPORTION / 4)) trace_length_catalog = trace_length_catalog_builder.create() progress_callback(_READ_PROPORTION + (_READ_PROPORTION / 2)) cdp_catalog = cdp_catalog_builder.create() progress_callback(_READ_PROPORTION + (_READ_PROPORTION * 3 / 4)) line_catalog = line_catalog_builder.create() if line_catalog is None: # Some 3D files put Inline and Crossline numbers in (TraceSequenceFile, cdp) pair line_catalog = alt_line_catalog_builder.create() progress_callback(1) return (trace_offset_catalog, trace_length_catalog, cdp_catalog, line_catalog)
def create_writer( fh, encoding=None, trace_header_format=TraceHeaderRev1, endian=">", progress=None, cache_directory=None, fast=False ): """Create a SegYWriter (or one of its subclasses) based on performing a scan of SEG Y data. This function is the preferred method for creating SegYWriter objects. It reads basic header information and attempts to build indexes for traces, CDP numbers (for 2D surveys), and inline and cross line co-ordinates (for 3D surveys) to facilitate subsequent random-access to traces. Args: fh: A file-like-object open in binary mode positioned such that the beginning of the reel header will be the next byte to be read. For disk-based SEG Y files, this is the beginning of the file. encoding: An optional text encoding for the textual headers. If None (the default) a heuristic will be used to guess the header encoding. trace_header_format: An optional class defining the layout of the trace header. Defaults to TraceHeaderRev1. endian: '>' for big-endian data (the standard and default), '<' for little-endian (non-standard) progress: A unary callable which will be passed a number between zero and one indicating the progress made. If provided, this callback will be invoked at least once with an argument equal to one. cache_directory: The directory for the cache file. Relative paths are interpreted as being relative to the directory containing the SEG Y file. Absolute paths are used as is. If cache_directory is None, caching is disabled. fast: Boolean flag to try a quick fixed length catalog before inline or CDP catalogs. Raises: ValueError: ``fh`` is unsuitable for some reason, such as not being open, not being seekable, not being in binary mode, or being too short. Returns: A SegYWriter object. Depending on the exact type of the SegYWriter returned different capabilities may be available. Inspect the returned object to determine these capabilities, or be prepared for capabilities not defined in the SegYWriter base class to be unavailable. The underlying file-like object must remain open for the duration of use of the returned reader object. It is the caller's responsibility to close the underlying file. Example: with open('my_seismic_data.sgy', 'rb') as fh: reader = create_reader(fh) print(reader.num_traces()) """ if hasattr(fh, "encoding") and fh.encoding is not None: raise TypeError("SegYWriter must be provided with a binary mode file object") if not fh.seekable(): raise TypeError("SegYWriter must be provided with a seekable file object") if fh.closed: raise ValueError("SegYWriter must be provided with an open file object") num_file_bytes = file_length(fh) if num_file_bytes < REEL_HEADER_NUM_BYTES: raise ValueError("SEG Y file {!r} of {} bytes is too short".format(filename_from_handle(fh), num_file_bytes)) if endian not in ("<", ">"): raise ValueError("Unrecognised endian value {!r}".format(endian)) reader = None cache_file_path = None if cache_directory is not None: sha1 = hash_for_file(fh, encoding, trace_header_format, endian) seg_y_path = filename_from_handle(fh) cache_file_path = _locate_cache_file(seg_y_path, cache_directory, sha1) if cache_file_path is not None: reader = _load_reader_from_cache(cache_file_path, seg_y_path) if reader is None: reader = _make_writer(fh, encoding, trace_header_format, endian, progress, fast=fast) if cache_directory is not None: _save_reader_to_cache(reader, cache_file_path) return reader
def create_reader(fh, encoding=None, endian='>', progress=None): """Create a SegYReader (or one of its subclasses) based on performing a scan of SEG Y data. This function is the preferred method for creating SegYReader objects. It reads basic header information and attempts to build indexes for traces, CDP numbers (for 2D surveys), and inline and cross line co-ordinates (for 3D surveys) to facilitate subsequent random-access to traces. Args: fh: A file-like-object open in binary mode positioned such that the beginning of the reel header will be the next byte to be read. For disk-based SEG Y files, this is the beginning of the file. encoding: An optional text encoding for the textual headers. If None (the default) a heuristic will be used to guess the header encoding. endian: '>' for big-endian data (the standard and default), '<' for little-endian (non-standard) progress: A unary callable which will be passed a number between zero and one indicating the progress made. If provided, this callback will be invoked at least once with an argument equal to one. Raises: ValueError: ``fh`` is unsuitable for some reason, such as not being open, not being seekable, not being in binary mode, or being too short. Returns: A SegYReader object. Depending on the exact type of the SegYReader returned different capabilities may be available. Inspect the returned object to determine these capabilities, or be prepared for capabilities not defined in the SegYReader base class to be unavailable. The underlying file-like object must remain open for the duration of use of the returned reader object. It is the caller's responsibility to close the underlying file. Example: with open('my_seismic_data.sgy', 'rb') as fh: reader = create_reader(fh) print(reader.num_traces()) """ if hasattr(fh, 'encoding') and fh.encoding is not None: raise TypeError( "SegYReader must be provided with a binary mode file object") if not seekable(fh): raise TypeError( "SegYReader must be provided with a seekable file object") if fh.closed: raise ValueError( "SegYReader must be provided with an open file object") num_file_bytes = file_length(fh) if num_file_bytes < REEL_HEADER_NUM_BYTES: raise ValueError( "SEG Y file {!r} of {} bytes is too short".format( filename_from_handle(fh), num_file_bytes)) if endian not in ('<', '>'): raise ValueError("Unrecognised endian value {!r}".format(endian)) if encoding is None: encoding = guess_textual_header_encoding(fh) if encoding is None: encoding = ASCII textual_reel_header = read_textual_reel_header(fh, encoding) binary_reel_header = read_binary_reel_header(fh, endian) extended_textual_header = read_extended_textual_headers(fh, binary_reel_header, encoding) revision = extract_revision(binary_reel_header) bps = bytes_per_sample(binary_reel_header, revision) trace_offset_catalog, trace_length_catalog, cdp_catalog, line_catalog = catalog_traces(fh, bps, endian, progress) if cdp_catalog is not None and line_catalog is None: return SegYReader2D(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, trace_length_catalog, cdp_catalog, encoding, endian) if cdp_catalog is None and line_catalog is not None: return SegYReader3D(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, trace_length_catalog, line_catalog, encoding, endian) return SegYReader(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, trace_length_catalog, encoding, endian)
def create_reader(fh, encoding=None, trace_header_format=TraceHeaderRev1, endian='>', progress=None): """Create a SegYReader (or one of its subclasses) based on performing a scan of SEG Y data. This function is the preferred method for creating SegYReader objects. It reads basic header information and attempts to build indexes for traces, CDP numbers (for 2D surveys), and inline and cross line co-ordinates (for 3D surveys) to facilitate subsequent random-access to traces. Args: fh: A file-like-object open in binary mode positioned such that the beginning of the reel header will be the next byte to be read. For disk-based SEG Y files, this is the beginning of the file. encoding: An optional text encoding for the textual headers. If None (the default) a heuristic will be used to guess the header encoding. trace_header_format: The class defining the layout of the trace header. endian: '>' for big-endian data (the standard and default), '<' for little-endian (non-standard) progress: A unary callable which will be passed a number between zero and one indicating the progress made. If provided, this callback will be invoked at least once with an argument equal to one. Raises: ValueError: ``fh`` is unsuitable for some reason, such as not being open, not being seekable, not being in binary mode, or being too short. Returns: A SegYReader object. Depending on the exact type of the SegYReader returned different capabilities may be available. Inspect the returned object to determine these capabilities, or be prepared for capabilities not defined in the SegYReader base class to be unavailable. The underlying file-like object must remain open for the duration of use of the returned reader object. It is the caller's responsibility to close the underlying file. Example: with open('my_seismic_data.sgy', 'rb') as fh: reader = create_reader(fh) print(reader.num_traces()) """ if hasattr(fh, 'encoding') and fh.encoding is not None: raise TypeError( "SegYReader must be provided with a binary mode file object") if not fh.seekable(): raise TypeError( "SegYReader must be provided with a seekable file object") if fh.closed: raise ValueError( "SegYReader must be provided with an open file object") num_file_bytes = file_length(fh) if num_file_bytes < REEL_HEADER_NUM_BYTES: raise ValueError("SEG Y file {!r} of {} bytes is too short".format( filename_from_handle(fh), num_file_bytes)) if endian not in ('<', '>'): raise ValueError("Unrecognised endian value {!r}".format(endian)) if encoding is None: encoding = guess_textual_header_encoding(fh) if encoding is None: encoding = ASCII textual_reel_header = read_textual_reel_header(fh, encoding) binary_reel_header = read_binary_reel_header(fh, endian) extended_textual_header = read_extended_textual_headers( fh, binary_reel_header, encoding) revision = extract_revision(binary_reel_header) bps = bytes_per_sample(binary_reel_header, revision) trace_offset_catalog, trace_length_catalog, cdp_catalog, line_catalog = catalog_traces( fh, bps, trace_header_format, endian, progress) if cdp_catalog is not None and line_catalog is None: return SegYReader2D(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, trace_length_catalog, cdp_catalog, trace_header_format, encoding, endian) if cdp_catalog is None and line_catalog is not None: return SegYReader3D(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, trace_length_catalog, line_catalog, trace_header_format, encoding, endian) return SegYReader(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, trace_length_catalog, trace_header_format, encoding, endian)
def catalog_fixed_length_traces(fh, binary_reel_header, trace_header_format=TraceHeaderRev1, endian='>', progress=None): """Build catalogs to for a fixed length SEG Y file. This is much faster than the full catalog, but has limitations. No CDP, or inline, xline catalogs, and it only works for segy files with fixed legth SEG Y files. Note: This function is faster than the full catalog, but has limitations. No CDP, or inline, xline catalogs, and it only works for SEG Y files with a fixed number of samples in each trace. Two catalogs will be built: 1. A catalog mapping trace_samples index (0-based) to the position of that trace_samples header in the file. 2. A catalog mapping trace_samples index (0-based) to the number of samples in that trace_samples. Args: fh: A file-like-object open in binary mode, positioned at the start of the first trace_samples header. bps: The number of bytes per sample, such as obtained by a call to bytes_per_sample() trace_header_format: The class defining the trace header format. Defaults to TraceHeaderRev1. endian: '>' for big-endian data (the standard and default), '<' for little-endian (non-standard) progress: A unary callable which will be passed a number between zero and one indicating the progress made. If provided, this callback will be invoked at least once with an argument equal to 1 Returns: A 4-tuple of the form (trace_samples-offset-catalog, trace_samples-length-catalog, None, None)` where each catalog is an instance of ``collections.Mapping`` or None if no catalog could be built. """ revision = extract_revision(binary_reel_header) bps = bytes_per_sample(binary_reel_header, revision) progress_callback = progress if progress is not None else lambda p: None if not callable(progress_callback): raise TypeError("catalog_traces(): progress callback must be callable") class CatalogSubFormat(metaclass=SubFormatMeta, parent_format=trace_header_format, parent_field_names=( 'file_sequence_num', 'ensemble_num', 'num_samples', 'inline_number', 'crossline_number', )): pass num_file_bytes = file_length(fh) num_samples=binary_reel_header.num_samples num_traces_float = (num_file_bytes-REEL_HEADER_NUM_BYTES)/(TRACE_HEADER_NUM_BYTES+num_samples*bps) num_traces = int(num_traces_float) if num_traces != num_traces_float: raise ValueError( "SEG Y file {!r} of {} bytes is not consistent with a fixed trace length".format( filename_from_handle(fh), num_file_bytes)) trace_offset_catalog_builder = CatalogBuilder() trace_length_catalog_builder = CatalogBuilder() for trace_index in range(num_traces): pos_begin=REEL_HEADER_NUM_BYTES+(num_samples * bps+TRACE_HEADER_NUM_BYTES) * trace_index trace_length_catalog_builder.add(trace_index, num_samples) trace_offset_catalog_builder.add(trace_index, pos_begin) trace_offset_catalog = trace_offset_catalog_builder.create() trace_length_catalog = trace_length_catalog_builder.create() progress_callback(1) return (trace_offset_catalog, trace_length_catalog, None, None)