Пример #1
0
Файл: evt.py Проект: mbyt/obspy
    def read(self, file_p, length, endian, param):
        """
        read data from file_p

        :param file_p: file pointer
        :param length: length to be read
        :param endian: endian type in datafile
        :type param: list
        :param param: sampling rate,sample size, block time, channels
        :rtype: list of list
        :return: list of data
        """
        buff = file_p.read(length)
        samplerate = param[0]
        numbyte = param[1]
        numchan = param[3]
        num = (samplerate // 10) * numbyte * numchan
        if length != num:
            raise EvtBadDataError("Bad data length")

        if numbyte == 2:
            data = from_buffer(buff, ">h").reshape((-1, numchan)).T
        elif numbyte == 4:
            data = from_buffer(buff, ">i").reshape((-1, numchan)).T
        elif numbyte == 3:
            data = np.empty((numchan, samplerate // 10))
            for j in range(samplerate // 10):
                for k in range(numchan):
                    i = (j * numchan) + k
                    val = unpack(b">i", buff[i * 3:(i * 3) + 3] + b'\0')[0] \
                        >> 8
                    data[k, j] = val

        return data
Пример #2
0
def _is_win(filename, century="20"):  # @UnusedVariable
    """
    Checks whether a file is WIN or not.

    :type filename: str
    :param filename: WIN file to be checked.
    :rtype: bool
    :return: ``True`` if a WIN file.
    """
    # as long we don't have full format description we just try to read the
    # file like _read_win and check for errors
    century = "20"  # hardcoded ;(
    try:
        with open(filename, "rb") as fpin:
            fpin.read(4)
            buff = fpin.read(6)
            yy = "%s%02x" % (century, ord(buff[0:1]))
            mm = "%x" % ord(buff[1:2])
            dd = "%x" % ord(buff[2:3])
            hh = "%x" % ord(buff[3:4])
            mi = "%x" % ord(buff[4:5])
            sec = "%x" % ord(buff[5:6])

            # This will raise for invalid dates.
            UTCDateTime(int(yy), int(mm), int(dd), int(hh), int(mi), int(sec))
            buff = fpin.read(4)
            '%02x' % ord(buff[0:1])
            '%02x' % ord(buff[1:2])
            int('%x' % (ord(buff[2:3]) >> 4))
            ord(buff[3:4])
            idata00 = fpin.read(4)
            from_buffer(idata00, native_str('>i'))[0]
    except Exception:
        return False
    return True
Пример #3
0
    def read(self, file_p, length, endian, param):
        """
        read data from file_p

        :param file_p: file pointer
        :param length: length to be read
        :param endian: endian type in datafile
        :type param: list
        :param param: sampling rate,sample size, block time, channels
        :rtype: list of list
        :return: list of data
        """
        buff = file_p.read(length)
        samplerate = param[0]
        numbyte = param[1]
        numchan = param[3]
        num = (samplerate // 10) * numbyte * numchan
        if length != num:
            raise EvtBadDataError("Bad data length")

        if numbyte == 2:
            data = from_buffer(buff, ">h").reshape((-1, numchan)).T
        elif numbyte == 4:
            data = from_buffer(buff, ">i").reshape((-1, numchan)).T
        elif numbyte == 3:
            data = np.empty((numchan, samplerate // 10))
            for j in range(samplerate // 10):
                for k in range(numchan):
                    i = (j * numchan) + k
                    val = unpack(b">i", buff[i * 3:(i * 3) + 3] + b'\0')[0] \
                        >> 8
                    data[k, j] = val

        return data
Пример #4
0
def _read_pdas(filename, **kwargs):
    """
    Reads a PDAS file and returns an ObsPy Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: PDAS file to be read.
    :rtype: :class:`~obspy.core.stream.Stream`
    :return: An ObsPy Stream object.

    .. rubric:: Example

    >>> from obspy import read
    >>> st = read("/path/to/p1246001.108")
    >>> st  # doctest: +ELLIPSIS
    <obspy.core.stream.Stream object at 0x...>
    >>> print(st)  # doctest: +ELLIPSIS
    1 Trace(s) in Stream:
    ... | 1994-04-18T00:00:00.000000Z - ... | 200.0 Hz, 500 samples
    """
    extra_headers = {}
    with open(filename, "rb") as fh:
        items = [fh.readline().split() for i_ in range(11)]
        data = fh.read()
    for i_ in (0, 1, 2, 3, 7, 8, 9):
        extra_headers[items[i_][0].decode()] = items[i_][1].decode()
    month, day, year = items[4][1].decode().split("-")
    if UTCDateTime().year > 2050:
        raise NotImplementedError()
    if len(year) == 2:
        if int(year) < 50:
            year = "20" + year
        else:
            year = "19" + year
    time = items[5][1].decode()
    t = UTCDateTime("%s-%s-%sT%s" % (year, month, day, time))
    sampling_rate = 1.0 / float(items[6][1].decode())
    dtype = items[1][1].decode()
    if dtype.upper() == "LONG":
        data = from_buffer(data, dtype=np.int16)
    elif dtype.upper() == "SHORT":
        data = from_buffer(data, dtype=np.int8)
    else:
        raise NotImplementedError()

    tr = Trace(data=data)
    tr.stats.starttime = t
    tr.stats.sampling_rate = sampling_rate
    tr.stats._format = "PDAS"
    tr.stats.pdas = extra_headers
    st = Stream(traces=[tr])
    return st
Пример #5
0
def _read_pdas(filename, **kwargs):
    """
    Reads a PDAS file and returns an ObsPy Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: PDAS file to be read.
    :rtype: :class:`~obspy.core.stream.Stream`
    :return: An ObsPy Stream object.

    .. rubric:: Example

    >>> from obspy import read
    >>> st = read("/path/to/p1246001.108")
    >>> st  # doctest: +ELLIPSIS
    <obspy.core.stream.Stream object at 0x...>
    >>> print(st)  # doctest: +ELLIPSIS
    1 Trace(s) in Stream:
    ... | 1994-04-18T00:00:00.000000Z - ... | 200.0 Hz, 500 samples
    """
    extra_headers = {}
    with open(filename, "rb") as fh:
        items = [fh.readline().split() for i_ in range(11)]
        data = fh.read()
    for i_ in (0, 1, 2, 3, 7, 8, 9):
        extra_headers[items[i_][0].decode()] = items[i_][1].decode()
    month, day, year = items[4][1].decode().split("-")
    if UTCDateTime().year > 2050:
        raise NotImplementedError()
    if len(year) == 2:
        if int(year) < 50:
            year = "20" + year
        else:
            year = "19" + year
    time = items[5][1].decode()
    t = UTCDateTime("%s-%s-%sT%s" % (year, month, day, time))
    sampling_rate = 1.0 / float(items[6][1].decode())
    dtype = items[1][1].decode()
    if dtype.upper() == "LONG":
        data = from_buffer(data, dtype=np.int16)
    elif dtype.upper() == "SHORT":
        data = from_buffer(data, dtype=np.int8)
    else:
        raise NotImplementedError()

    tr = Trace(data=data)
    tr.stats.starttime = t
    tr.stats.sampling_rate = sampling_rate
    tr.stats._format = "PDAS"
    tr.stats.pdas = extra_headers
    st = Stream(traces=[tr])
    return st
Пример #6
0
def _initial_unpack_packets(bytestring):
    """
    First unpack data with dtype matching itemsize of storage in the reftek
    file, than allocate result array with dtypes for storage of python
    objects/arrays and fill it with the unpacked data.
    """
    if not len(bytestring):
        return np.array([], dtype=PACKET_FINAL_DTYPE)

    if len(bytestring) % 1024 != 0:
        tail = len(bytestring) % 1024
        bytestring = bytestring[:-tail]
        msg = ("Length of data not a multiple of 1024. Data might be "
               "truncated. Dropping {:d} byte(s) at the end.").format(tail)
        warnings.warn(msg)
    data = from_buffer(
        bytestring, dtype=PACKET_INITIAL_UNPACK_DTYPE)
    result = np.empty_like(data, dtype=PACKET_FINAL_DTYPE)

    for name, dtype_initial, converter, dtype_final in PACKET:
        if converter is None:
            result[name][:] = data[name][:]
        else:
            try:
                result[name][:] = converter(data[name])
            except Exception as e:
                raise Reftek130UnpackPacketError(str(e))
    # time unpacking is special and needs some additional work.
    # we need to add the POSIX timestamp of the start of respective year to the
    # already unpacked seconds into the respective year..
    result['time'][:] += [_get_nanoseconds_for_start_of_year(y)
                          for y in result['year']]
    return result
Пример #7
0
    def get_waveforms_for_event(self, event_id):
        wf_ids = self._events[event_id]["waveform_ids"]
        _t = self._dataframes["wfdisc"]

        st = obspy.Stream()

        for wf in wf_ids:
            wf = _t[_t.id == wf].iloc[0]

            with io.open(wf.filename, "rb") as fh:
                data = fh.read(4 * wf.npts)

            data = from_buffer(data, dtype=np.float32)
            # Data is big-endian - we just want to work with little endian.
            data.byteswap(True)

            tr = obspy.Trace(data=data)
            tr.stats.network = "LL"
            tr.stats.station = wf.station
            tr.stats.sampling_rate = wf.sampling_rate
            tr.stats.starttime = wf.starttime
            tr.stats.channel = wf.channel.upper()
            tr.stats.calib = wf.calib

            st.append(tr)

        return st
Пример #8
0
def _get_ms_file_info(f, real_name):
    """
    Takes a Mini-SEED filename as an argument and returns a dictionary
    with some basic information about the file. Also suitable for Full
    SEED.

    This is an exact copy of a method of the same name in utils. Due to
    circular imports this method cannot be import from utils.
    XXX: Figure out a better way!

    :param f: File pointer of opened file in binary format
    :param real_name: Realname of the file, needed for calculating size
    """
    # get size of file
    info = {'filesize': os.path.getsize(real_name)}
    pos = f.tell()
    f.seek(0)
    rec_buffer = from_buffer(f.read(512), dtype=np.int8)
    info['record_length'] = clibmseed.ms_detect(rec_buffer, 512)
    # Calculate Number of Records
    info['number_of_records'] = int(info['filesize'] //
                                    info['record_length'])
    info['excess_bytes'] = info['filesize'] % info['record_length']
    f.seek(pos)
    return info
Пример #9
0
def _initial_unpack_packets(bytestring):
    """
    First unpack data with dtype matching itemsize of storage in the reftek
    file, than allocate result array with dtypes for storage of python
    objects/arrays and fill it with the unpacked data.
    """
    if not len(bytestring):
        return np.array([], dtype=PACKET_FINAL_DTYPE)

    if len(bytestring) % 1024 != 0:
        tail = len(bytestring) % 1024
        bytestring = bytestring[:-tail]
        msg = ("Length of data not a multiple of 1024. Data might be "
               "truncated. Dropping {:d} byte(s) at the end.").format(tail)
        warnings.warn(msg)
    data = from_buffer(bytestring, dtype=PACKET_INITIAL_UNPACK_DTYPE)
    result = np.empty_like(data, dtype=PACKET_FINAL_DTYPE)

    for name, dtype_initial, converter, dtype_final in PACKET:
        if converter is None:
            result[name][:] = data[name][:]
        else:
            try:
                result[name][:] = converter(data[name])
            except Exception as e:
                raise Reftek130UnpackPacketError(str(e))
    # time unpacking is special and needs some additional work.
    # we need to add the POSIX timestamp of the start of respective year to the
    # already unpacked seconds into the respective year..
    result['time'][:] += [
        _get_nanoseconds_for_start_of_year(y) for y in result['year']
    ]
    return result
Пример #10
0
def _read_css(filename, **kwargs):
    """
    Reads a CSS waveform file and returns a Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: CSS file to be read.
    :rtype: :class:`~obspy.core.stream.Stream`
    :returns: Stream with Traces specified by given file.
    """
    # read metafile with info on single traces
    with open(filename, "rb") as fh:
        lines = fh.readlines()
    basedir = Path(filename).parent
    traces = []
    # read single traces
    for line in lines:
        npts = int(line[79:87])
        dirname = line[148:212].strip().decode()
        dfilename = Path(basedir) / dirname / line[213:245].strip().decode()
        offset = int(line[246:256])
        dtype = DTYPE[line[143:145]]
        if isinstance(dtype, tuple):
            read_fmt = np.dtype(dtype[0])
            fmt = dtype[1]
        else:
            read_fmt = np.dtype(dtype)
            fmt = read_fmt

        try:
            # assumed that the waveform file is not compressed
            fh = open(dfilename, "rb")
        except FileNotFoundError as e:
            # If does not find the waveform file referenced in the wfdisc,
            # it will try to open a compressed .gz suffix file instead.
            try:
                fh = gzip.open(str(dfilename) + '.gz', "rb")
            except FileNotFoundError:
                raise e

        # Read one segment of binary data
        fh.seek(offset)
        data = fh.read(read_fmt.itemsize * npts)
        fh.close()
        data = from_buffer(data, dtype=read_fmt)
        data = np.require(data, dtype=fmt)

        header = {}
        header['station'] = line[0:6].strip().decode()
        header['channel'] = line[7:15].strip().decode()
        header['starttime'] = UTCDateTime(float(line[16:33]))
        header['sampling_rate'] = float(line[88:99])
        header['calib'] = float(line[100:116])
        header['calper'] = float(line[117:133])
        tr = Trace(data, header=header)
        traces.append(tr)
    return Stream(traces=traces)
Пример #11
0
 def test_bug_write_read_float32_seed_win32(self):
     """
     Test case for issue #64.
     """
     # create stream object
     data = np.array([
         395.07809448, 395.0782, 1060.28112793, -1157.37487793,
         -1236.56237793, 355.07028198, -1181.42175293
     ],
                     dtype=np.float32)
     st = Stream([Trace(data=data)])
     with NamedTemporaryFile() as tf:
         tempfile = tf.name
         _write_mseed(st, tempfile, format="MSEED")
         # read temp file directly without libmseed
         with open(tempfile, 'rb') as fp:
             fp.seek(56)
             dtype = np.dtype(native_str('>f4'))
             bin_data = from_buffer(fp.read(7 * dtype.itemsize),
                                    dtype=dtype)
         np.testing.assert_array_equal(data, bin_data)
         # read via ObsPy
         st2 = _read_mseed(tempfile)
     # test results
     np.testing.assert_array_equal(data, st2[0].data)
Пример #12
0
def readdata(fid, n, t):
    target = types[t]
    # avoid passing np.intXX down to SpooledTemporaryFile.read() since it
    # errors out on numpy integer types on at least Python 3.6, seems fixed in
    # Python 3.7
    # see https://ci.appveyor.com/project/obspy/obspy/
    #                  builds/29252080/job/9gr8bqkgr005523n#L742
    data = fid.read(int(np.dtype(target).itemsize * n))
    return from_buffer(data, target)
Пример #13
0
def unpack_4byte_integer(file, count, endian='>'):
    """
    Unpacks 4 byte integers.
    """
    # Read as 4 byte integer so bit shifting works.
    data = from_buffer(file.read(count * 4), dtype=np.int32)
    # Swap the byte order if necessary.
    if BYTEORDER != endian:
        data = data.byteswap()
    return data
Пример #14
0
def unpack_2byte_integer(file, count, endian='>'):
    """
    Unpacks 2 byte integers.
    """
    # Read as 4 byte integer so bit shifting works.
    data = from_buffer(file.read(count * 2), dtype=np.int16)
    # Swap the byte order if necessary.
    if BYTEORDER != endian:
        data = data.byteswap()
    return data
Пример #15
0
def unpack_4byte_ieee(file, count, endian='>'):
    """
    Unpacks 4 byte IEEE floating points.
    """
    # Read as 4 byte integer so bit shifting works.
    data = from_buffer(file.read(count * 4), dtype=np.float32)
    # Swap the byte order if necessary.
    if BYTEORDER != endian:
        data = data.byteswap()
    return data
Пример #16
0
 def test_read_and_write_segy(self, headonly=False):
     """
     Reading and writing again should not change a file.
     """
     for file, attribs in self.files.items():
         file = os.path.join(self.path, file)
         non_normalized_samples = attribs['non_normalized_samples']
         # Read the file.
         with open(file, 'rb') as f:
             org_data = f.read()
         segy_file = _read_segy(file, headonly=headonly)
         with NamedTemporaryFile() as tf:
             out_file = tf.name
             with warnings.catch_warnings(record=True):
                 segy_file.write(out_file)
             # Read the new file again.
             with open(out_file, 'rb') as f:
                 new_data = f.read()
         # The two files should have the same length.
         self.assertEqual(len(org_data), len(new_data))
         # Replace the not normalized samples. The not normalized
         # samples are already tested in test_packSEGYData and therefore not
         # tested again here.
         if len(non_normalized_samples) != 0:
             # Convert to 4 byte integers. Any 4 byte numbers work.
             org_data = from_buffer(org_data, np.int32)
             new_data = from_buffer(new_data, np.int32)
             # Skip the header (4*960 bytes) and replace the non normalized
             # data samples.
             org_data[960:][non_normalized_samples] = \
                 new_data[960:][non_normalized_samples]
             # Create strings again.
             org_data = org_data.tostring()
             new_data = new_data.tostring()
         # Just patch both headers - this tests something different.
         org_data = _patch_header(org_data)
         new_data = _patch_header(new_data)
         # Always write the SEGY File revision number!
         # org_data[3500:3502] = new_data[3500:3502]
         # Test the identity without the SEGY revision number
         self.assertEqual(org_data[:3500], new_data[:3500])
         self.assertEqual(org_data[3502:], new_data[3502:])
Пример #17
0
 def test_read_and_write_segy(self, headonly=False):
     """
     Reading and writing again should not change a file.
     """
     for file, attribs in self.files.items():
         file = os.path.join(self.path, file)
         non_normalized_samples = attribs['non_normalized_samples']
         # Read the file.
         with open(file, 'rb') as f:
             org_data = f.read()
         segy_file = _read_segy(file, headonly=headonly)
         with NamedTemporaryFile() as tf:
             out_file = tf.name
             with warnings.catch_warnings(record=True):
                 segy_file.write(out_file)
             # Read the new file again.
             with open(out_file, 'rb') as f:
                 new_data = f.read()
         # The two files should have the same length.
         self.assertEqual(len(org_data), len(new_data))
         # Replace the not normalized samples. The not normalized
         # samples are already tested in test_packSEGYData and therefore not
         # tested again here.
         if len(non_normalized_samples) != 0:
             # Convert to 4 byte integers. Any 4 byte numbers work.
             org_data = from_buffer(org_data, np.int32)
             new_data = from_buffer(new_data, np.int32)
             # Skip the header (4*960 bytes) and replace the non normalized
             # data samples.
             org_data[960:][non_normalized_samples] = \
                 new_data[960:][non_normalized_samples]
             # Create strings again.
             org_data = org_data.tostring()
             new_data = new_data.tostring()
         # Just patch both headers - this tests something different.
         org_data = _patch_header(org_data)
         new_data = _patch_header(new_data)
         # Always write the SEGY File revision number!
         # org_data[3500:3502] = new_data[3500:3502]
         # Test the identity without the SEGY revision number
         self.assertEqual(org_data[:3500], new_data[:3500])
         self.assertEqual(org_data[3502:], new_data[3502:])
Пример #18
0
 def parse_data(self, dat):
     """
     Parse tracebuf char array data into self.data
     """
     self.data = from_buffer(dat, self.input_type)
     ndat = len(self.data)
     if self.ndata != ndat:
         msg = 'data count in header (%d) != data count (%d)'
         print(msg % (self.nsamp, ndat), file=sys.stderr)
         self.ndata = ndat
     return
Пример #19
0
 def parse_data(self, dat):
     """
     Parse tracebuf char array data into self.data
     """
     self.data = from_buffer(dat, self.inputType)
     ndat = len(self.data)
     if self.ndata != ndat:
         msg = 'data count in header (%d) != data count (%d)'
         print(msg % (self.nsamp, ndat), file=sys.stderr)
         self.ndata = ndat
     return
Пример #20
0
 def get_ms_record(self):
     # following from obspy.io.mseed.tests.test_libmseed.py -> test_msrParse
     msr = clibmseed.msr_init(None)
     pyobj = from_buffer(self.msrecord, dtype=np.int8)
     errcode = \
         clibmseed.msr_parse(pyobj, len(pyobj), C.pointer(msr), -1, 1, 1)
     if errcode != 0:
         msg = "failed to decode mini-seed record: msr_parse errcode: %s"
         raise SeedLinkException(msg % (errcode))
     # print "DEBUG: msr:", msr
     msrecord_py = msr.contents
     # print "DEBUG: msrecord_py:", msrecord_py
     return msr, msrecord_py
Пример #21
0
def unpack_4byte_ibm(file, count, endian='>'):
    """
    Unpacks 4 byte IBM floating points.
    """
    # Read as 4 byte integer so bit shifting works.
    data = from_buffer(file.read(count * 4), dtype=np.float32)
    # Swap the byte order if necessary.
    if BYTEORDER != endian:
        data = data.byteswap()
    length = len(data)
    # Call the C code which transforms the data inplace.
    clibsegy.ibm2ieee(data, length)
    return data
Пример #22
0
 def get_ms_record(self):
     # following from obspy.io.mseed.tests.test_libmseed.py -> test_msrParse
     msr = clibmseed.msr_init(None)
     pyobj = from_buffer(self.msrecord, dtype=np.int8)
     errcode = \
         clibmseed.msr_parse(pyobj, len(pyobj), C.pointer(msr), -1, 1, 1)
     if errcode != 0:
         msg = "failed to decode mini-seed record: msr_parse errcode: %s"
         raise SeedLinkException(msg % (errcode))
     # print "DEBUG: msr:", msr
     msrecord_py = msr.contents
     # print "DEBUG: msrecord_py:", msrecord_py
     return msr, msrecord_py
Пример #23
0
def unpack_4byte_ibm(file, count, endian='>'):
    """
    Unpacks 4 byte IBM floating points.
    """
    # Read as 4 byte integer so bit shifting works.
    data = from_buffer(file.read(count * 4), dtype=np.float32)
    # Swap the byte order if necessary.
    if BYTEORDER != endian:
        data = data.byteswap()
    length = len(data)
    # Call the C code which transforms the data inplace.
    clibsegy.ibm2ieee(data, length)
    return data
Пример #24
0
 def _readline(fh, version=version, dtype=dtype):
     if version >= 7:
         # On Sun, Linux, MaxOSX and PC from version 7.0 (using Digital
         # Fortran), every write is preceded and terminated with 4
         # additional bytes giving the number of bytes in the write.
         # With 64 bit systems, 8 bytes is used to define number of bytes
         # written.
         start_bytes = fh.read(dtype.itemsize)
         # convert to int32/int64
         length = from_buffer(start_bytes, dtype=dtype)[0]
         data = fh.read(length)
         end_bytes = fh.read(dtype.itemsize)
         assert start_bytes == end_bytes
         return data
     else:  # version <= 6
         # Every write is preceded and terminated with one byte giving the
         # number of bytes in the write. If the write contains more than 128
         # bytes, it is blocked in records of 128 bytes, each with the start
         # and end byte which in this case is the number 128. Each record is
         # thus 130 bytes long.
         data = b''
         while True:
             start_byte = fh.read(1)
             if not start_byte:
                 # end of file
                 break
             # convert to unsigned int8
             length = from_buffer(start_byte, np.uint8)[0]
             data += fh.read(length)
             end_byte = fh.read(1)
             assert start_byte == end_byte
             if length == 128:
                 # blocked data - repeat loop
                 continue
             # end of blocked data
             break
         return data
Пример #25
0
 def _readline(fh, version=version, dtype=dtype):
     if version >= 7:
         # On Sun, Linux, MaxOSX and PC from version 7.0 (using Digital
         # Fortran), every write is preceded and terminated with 4
         # additional bytes giving the number of bytes in the write.
         # With 64 bit systems, 8 bytes is used to define number of bytes
         # written.
         start_bytes = fh.read(dtype.itemsize)
         # convert to int32/int64
         length = from_buffer(start_bytes, dtype=dtype)[0]
         data = fh.read(length)
         end_bytes = fh.read(dtype.itemsize)
         assert start_bytes == end_bytes
         return data
     else:  # version <= 6
         # Every write is preceded and terminated with one byte giving the
         # number of bytes in the write. If the write contains more than 128
         # bytes, it is blocked in records of 128 bytes, each with the start
         # and end byte which in this case is the number 128. Each record is
         # thus 130 bytes long.
         data = b''
         while True:
             start_byte = fh.read(1)
             if not start_byte:
                 # end of file
                 break
             # convert to unsigned int8
             length = from_buffer(start_byte, np.uint8)[0]
             data += fh.read(length)
             end_byte = fh.read(1)
             assert start_byte == end_byte
             if length == 128:
                 # blocked data - repeat loop
                 continue
             # end of blocked data
             break
         return data
Пример #26
0
def _is_win(filename, century="20"):  # @UnusedVariable
    """
    Checks whether a file is WIN or not.

    :type filename: str
    :param filename: WIN file to be checked.
    :rtype: bool
    :return: ``True`` if a WIN file.
    """
    # as long we don't have full format description we just try to read the
    # file like _read_win and check for errors
    century = "20"  # hardcoded ;(
    try:
        with open(filename, "rb") as fpin:
            fpin.read(4)
            buff = fpin.read(6)
            yy = "%s%02x" % (century, ord(buff[0:1]))
            mm = "%x" % ord(buff[1:2])
            dd = "%x" % ord(buff[2:3])
            hh = "%x" % ord(buff[3:4])
            mi = "%x" % ord(buff[4:5])
            sec = "%x" % ord(buff[5:6])

            # This will raise for invalid dates.
            UTCDateTime(int(yy), int(mm), int(dd), int(hh), int(mi),
                        int(sec))
            buff = fpin.read(4)
            '%02x' % ord(buff[0:1])
            '%02x' % ord(buff[1:2])
            int('%x' % (ord(buff[2:3]) >> 4))
            ord(buff[3:4])
            idata00 = fpin.read(4)
            from_buffer(idata00, native_str('>i'))[0]
    except Exception:
        return False
    return True
Пример #27
0
def readstructtag(fid):
    y = AttribDict()
    # avoid passing np.intXX down to SpooledTemporaryFile.read() since it
    # errors out on numpy integer types on at least Python 3.6, seems fixed in
    # Python 3.7
    # see https://ci.appveyor.com/project/obspy/obspy/
    #                  builds/29252080/job/9gr8bqkgr005523n#L742
    data = fid.read(int(structtag_dtypes.itemsize))
    data = from_buffer(data, structtag_dtypes)
    for (key, (fmt, size)) in structtag_dtypes.fields.items():
        if str(fmt).count("S") != 0:
            y[key] = data[key][0].decode('UTF-8')
        else:
            y[key] = data[key][0]
    return y
Пример #28
0
def _read_nnsa_kb_core(filename, **kwargs):
    """
    Reads a NNSA KB Core waveform file and returns a Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: NNSA KB Core file to be read.
    :rtype: :class:`~obspy.core.stream.Stream`
    :returns: Stream with Traces specified by given file.
    """
    # read metafile with info on single traces
    with open(filename, "rb") as fh:
        lines = fh.readlines()
    basedir = Path(filename).parent
    traces = []
    # read single traces
    for line in lines:
        npts = int(line[80:88])
        dirname = line[149:213].strip().decode()
        filename = Path(basedir) / dirname / \
            line[214:246].strip().decode()

        offset = int(line[247:257])
        dtype = DTYPE[line[144:146]]
        if isinstance(dtype, tuple):
            read_fmt = np.dtype(dtype[0])
            fmt = dtype[1]
        else:
            read_fmt = np.dtype(dtype)
            fmt = read_fmt
        with open(filename, "rb") as fh:
            fh.seek(offset)
            data = fh.read(read_fmt.itemsize * npts)
            data = from_buffer(data, dtype=read_fmt)
            data = np.require(data, dtype=fmt)
        header = {}
        header['station'] = line[0:6].strip().decode()
        header['channel'] = line[7:15].strip().decode()
        header['starttime'] = UTCDateTime(float(line[16:33]))
        header['sampling_rate'] = float(line[89:100])
        header['calib'] = float(line[101:117])
        header['calper'] = float(line[118:134])
        tr = Trace(data, header=header)
        traces.append(tr)
    return Stream(traces=traces)
Пример #29
0
def _read_css(filename, **kwargs):
    """
    Reads a CSS waveform file and returns a Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: CSS file to be read.
    :rtype: :class:`~obspy.core.stream.Stream`
    :returns: Stream with Traces specified by given file.
    """
    # read metafile with info on single traces
    with open(filename, "rb") as fh:
        lines = fh.readlines()
    basedir = os.path.dirname(filename)
    traces = []
    # read single traces
    for line in lines:
        npts = int(line[79:87])
        dirname = line[148:212].strip().decode()
        filename = line[213:245].strip().decode()
        filename = os.path.join(basedir, dirname, filename)
        offset = int(line[246:256])
        dtype = DTYPE[line[143:145]]
        if isinstance(dtype, tuple):
            read_fmt = np.dtype(dtype[0])
            fmt = dtype[1]
        else:
            read_fmt = np.dtype(dtype)
            fmt = read_fmt
        with open(filename, "rb") as fh:
            fh.seek(offset)
            data = fh.read(read_fmt.itemsize * npts)
            data = from_buffer(data, dtype=read_fmt)
            data = np.require(data, dtype=fmt)
        header = {}
        header['station'] = line[0:6].strip().decode()
        header['channel'] = line[7:15].strip().decode()
        header['starttime'] = UTCDateTime(float(line[16:33]))
        header['sampling_rate'] = float(line[88:99])
        header['calib'] = float(line[100:116])
        header['calper'] = float(line[117:133])
        tr = Trace(data, header=header)
        traces.append(tr)
    return Stream(traces=traces)
Пример #30
0
def _read_css(filename, **kwargs):
    """
    Reads a CSS waveform file and returns a Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: CSS file to be read.
    :rtype: :class:`~obspy.core.stream.Stream`
    :returns: Stream with Traces specified by given file.
    """
    # read metafile with info on single traces
    with open(filename, "rb") as fh:
        lines = fh.readlines()
    basedir = os.path.dirname(filename)
    traces = []
    # read single traces
    for line in lines:
        npts = int(line[79:87])
        dirname = line[148:212].strip().decode()
        filename = line[213:245].strip().decode()
        filename = os.path.join(basedir, dirname, filename)
        offset = int(line[246:256])
        dtype = DTYPE[line[143:145]]
        if isinstance(dtype, tuple):
            read_fmt = np.dtype(dtype[0])
            fmt = dtype[1]
        else:
            read_fmt = np.dtype(dtype)
            fmt = read_fmt
        with open(filename, "rb") as fh:
            fh.seek(offset)
            data = fh.read(read_fmt.itemsize * npts)
            data = from_buffer(data, dtype=read_fmt)
            data = np.require(data, dtype=fmt)
        header = {}
        header['station'] = line[0:6].strip().decode()
        header['channel'] = line[7:15].strip().decode()
        header['starttime'] = UTCDateTime(float(line[16:33]))
        header['sampling_rate'] = float(line[88:99])
        header['calib'] = float(line[100:116])
        header['calper'] = float(line[117:133])
        tr = Trace(data, header=header)
        traces.append(tr)
    return Stream(traces=traces)
Пример #31
0
def _read_wav(filename, headonly=False, **kwargs):  # @UnusedVariable
    """
    Reads a audio WAV file and returns an ObsPy Stream object.

    Currently supports uncompressed unsigned char and short integer and
    integer data values. This should cover most WAV files.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: Audio WAV file to be read.
    :rtype: :class:`~obspy.core.stream.Stream`
    :return: A ObsPy Stream object.

    .. rubric:: Example

    >>> from obspy import read
    >>> st = read("/path/to/3cssan.near.8.1.RNON.wav")
    >>> print(st) #doctest: +NORMALIZE_WHITESPACE
    1 Trace(s) in Stream:
    ... | 1970-01-01T00:00:00.000000Z - 1970-01-01T00:00:00.371143Z
    | 7000.0 Hz, 2599 samples
    """
    # read WAV file
    fh = wave.open(filename, 'rb')
    try:
        # header information
        (_nchannel, width, rate, length, _comptype, _compname) = fh.getparams()
        header = {'sampling_rate': rate, 'npts': length}
        if headonly:
            return Stream([Trace(header=header)])
        if width not in WIDTH2DTYPE.keys():
            msg = "Unsupported Format Type, word width %dbytes" % width
            raise TypeError(msg)
        data = from_buffer(fh.readframes(length), dtype=WIDTH2DTYPE[width])
    finally:
        fh.close()
    return Stream([Trace(header=header, data=data)])
Пример #32
0
 def test_bug_write_read_float32_seed_win32(self):
     """
     Test case for issue #64.
     """
     # create stream object
     data = np.array([395.07809448, 395.0782, 1060.28112793, -1157.37487793,
                      -1236.56237793, 355.07028198, -1181.42175293],
                     dtype=np.float32)
     st = Stream([Trace(data=data)])
     with NamedTemporaryFile() as tf:
         tempfile = tf.name
         _write_mseed(st, tempfile, format="MSEED")
         # read temp file directly without libmseed
         with open(tempfile, 'rb') as fp:
             fp.seek(56)
             dtype = np.dtype(native_str('>f4'))
             bin_data = from_buffer(fp.read(7 * dtype.itemsize),
                                    dtype=dtype)
         np.testing.assert_array_equal(data, bin_data)
         # read via ObsPy
         st2 = _read_mseed(tempfile)
     # test results
     np.testing.assert_array_equal(data, st2[0].data)
Пример #33
0
def _get_ms_file_info(f, real_name):
    """
    Takes a Mini-SEED filename as an argument and returns a dictionary
    with some basic information about the file. Also suitable for Full
    SEED.

    This is an exact copy of a method of the same name in utils. Due to
    circular imports this method cannot be import from utils.
    XXX: Figure out a better way!

    :param f: File pointer of opened file in binary format
    :param real_name: Realname of the file, needed for calculating size
    """
    # get size of file
    info = {'filesize': os.path.getsize(real_name)}
    pos = f.tell()
    f.seek(0)
    rec_buffer = from_buffer(f.read(512), dtype=np.int8)
    info['record_length'] = clibmseed.ms_detect(rec_buffer, 512)
    # Calculate Number of Records
    info['number_of_records'] = int(info['filesize'] // info['record_length'])
    info['excess_bytes'] = info['filesize'] % info['record_length']
    f.seek(pos)
    return info
Пример #34
0
    def test_pack_segy_data(self):
        """
        Tests the packing of various SEG Y files.
        """
        # Loop over all files.
        for file, attribs in self.files.items():
            # Get some attributes.
            data_format = attribs['data_sample_enc']
            endian = attribs['endian']
            count = attribs['sample_count']
            size = attribs['sample_size']
            non_normalized_samples = attribs['non_normalized_samples']
            dtype = self.dtypes[data_format]
            file = os.path.join(self.path, file)
            # Load the data. This data has previously been unpacked by
            # Madagascar.
            data = np.load(file + '.npy').ravel()
            data = np.require(data, dtype)
            # Load the packed data.
            with open(file, 'rb') as f:
                # Jump to the beginning of the data.
                f.seek(3200 + 400 + 240)
                packed_data = f.read(count * size)
            # The pack functions all write to file objects.
            f = io.BytesIO()
            # Pack the data.
            DATA_SAMPLE_FORMAT_PACK_FUNCTIONS[data_format](f, data, endian)
            # Read again.0.
            f.seek(0, 0)
            new_packed_data = f.read()
            # Check the length.
            self.assertEqual(len(packed_data), len(new_packed_data))
            if len(non_normalized_samples) == 0:
                # The packed data should be totally identical.
                self.assertEqual(packed_data, new_packed_data)
            else:
                # Some test files contain non normalized IBM floating point
                # data. These cannot be reproduced exactly.
                # Just a sanity check to be sure it is only IBM floating point
                # data that does not work completely.
                self.assertEqual(data_format, 1)

                # Read the data as uint8 to be able to directly access the
                # different bytes.
                # Original data.
                packed_data = from_buffer(packed_data, np.uint8)
                # Newly written.
                new_packed_data = from_buffer(new_packed_data, np.uint8)

                # Figure out the non normalized fractions in the original data
                # because these cannot be compared directly.
                # Get the position of the first byte of the fraction depending
                # on the endianness.
                if endian == '>':
                    start = 1
                else:
                    start = 2
                # The first byte of the fraction.
                first_fraction_byte_old = packed_data[start::4]
                # First get all zeros in the original data because zeros have
                # to be treated differently.
                zeros = np.where(data == 0)[0]
                # Create a copy and set the zeros to a high number to be able
                # to find all non normalized numbers.
                fraction_copy = first_fraction_byte_old.copy()
                fraction_copy[zeros] = 255
                # Normalized numbers will have no zeros in the first 4 bit of
                # the fraction. This means that the most significant byte of
                # the fraction has to be at least 16 for it to be normalized.
                non_normalized = np.where(fraction_copy < 16)[0]

                # Sanity check if the file data and the calculated data are the
                # same.
                np.testing.assert_array_equal(non_normalized,
                                              np.array(non_normalized_samples))

                # Test all other parts of the packed data. Set dtype to int32
                # to get 4 byte numbers.
                packed_data_copy = packed_data.copy()
                new_packed_data_copy = new_packed_data.copy()
                packed_data_copy.dtype = np.int32
                new_packed_data_copy.dtype = np.int32
                # Equalize the non normalized parts.
                packed_data_copy[non_normalized] = \
                    new_packed_data_copy[non_normalized]
                np.testing.assert_array_equal(packed_data_copy,
                                              new_packed_data_copy)

                # Now check the non normalized parts if they are almost the
                # same.
                data = data[non_normalized]
                # Unpack the data again.
                new_packed_data.dtype = np.int32
                new_packed_data = new_packed_data[non_normalized]
                length = len(new_packed_data)
                f = io.BytesIO()
                f.write(new_packed_data.tostring())
                f.seek(0, 0)
                new_data = DATA_SAMPLE_FORMAT_UNPACK_FUNCTIONS[1](
                    f, length, endian)
                f.close()
                packed_data.dtype = np.int32
                packed_data = packed_data[non_normalized]
                length = len(packed_data)
                f = io.BytesIO()
                f.write(packed_data.tostring())
                f.seek(0, 0)
                old_data = DATA_SAMPLE_FORMAT_UNPACK_FUNCTIONS[1](
                    f, length, endian)
                f.close()
                # This works because the normalized and the non normalized IBM
                # floating point numbers will be close enough for the internal
                # IEEE representation to be identical.
                np.testing.assert_array_equal(data, new_data)
                np.testing.assert_array_equal(data, old_data)
Пример #35
0
def read_sac(source, headonly=False, byteorder=None, checksize=False):
    """
    Read a SAC binary file.

    :param source: Full path string for File-like object from a SAC binary file
        on disk.  If it is an open File object, open 'rb'.
    :type source: str or file
    :param headonly: If headonly is True, only read the header arrays not the
        data array.
    :type headonly: bool
    :param byteorder: If omitted or None, automatic byte-order checking is
        done, starting with native order. If byteorder is specified and
        incorrect, a SacIOError is raised.
    :type byteorder: str {'little', 'big'}, optional
    :param checksize: If True, check that the theoretical file size from the
        header matches the size on disk.
    :type checksize: bool

    :return: The float, integer, and string header arrays, and data array,
        in that order. Data array will be None if headonly is True.
    :rtype: tuple of :class:`numpy.ndarray`

    :raises: :class:`ValueError` if unrecognized byte order.  :class:`IOError`
        if file not found, incorrect specified byteorder, theoretical file size
        doesn't match header, or header arrays are incorrect length.

    """
    # TODO: rewrite using "with" statement instead of open/close management.
    # check byte order, header array length, file size, npts == data length
    try:
        f = open(source, 'rb')
        is_file_name = True
    except TypeError:
        # source is already a file-like object
        f = source
        is_file_name = False

    is_byteorder_specified = byteorder is not None
    if not is_byteorder_specified:
        byteorder = sys.byteorder

    if byteorder == 'little':
        endian_str = '<'
    elif byteorder == 'big':
        endian_str = '>'
    else:
        raise ValueError("Unrecognized byteorder. Use {'little', 'big'}")

    # --------------------------------------------------------------
    # READ HEADER
    # The sac header has 70 floats, 40 integers, then 192 bytes
    #    in strings. Store them in array (and convert the char to a
    #    list). That's a total of 632 bytes.
    # --------------------------------------------------------------
    hf = from_buffer(f.read(4 * 70), dtype=endian_str + 'f4')
    hi = from_buffer(f.read(4 * 40), dtype=endian_str + 'i4')
    hs = from_buffer(f.read(24 * 8), dtype='|S8')

    if not is_valid_byteorder(hi):
        if is_byteorder_specified:
            if is_file_name:
                f.close()
            # specified but not valid. you dun messed up.
            raise SacIOError("Incorrect byteorder {}".format(byteorder))
        else:
            # not valid, but not specified.
            # swap the dtype interpretation (dtype.byteorder), but keep the
            # bytes, so the arrays in memory reflect the bytes on disk
            hf = hf.newbyteorder('S')
            hi = hi.newbyteorder('S')

    # we now have correct headers, let's use their correct byte order.
    endian_str = hi.dtype.byteorder

    # check header lengths
    if len(hf) != 70 or len(hi) != 40 or len(hs) != 24:
        hf = hi = hs = None
        if is_file_name:
            f.close()
        raise SacIOError("Cannot read all header values")

    npts = hi[HD.INTHDRS.index('npts')]

    # check file size
    if checksize:
        cur_pos = f.tell()
        f.seek(0, os.SEEK_END)
        length = f.tell()
        f.seek(cur_pos, os.SEEK_SET)
        th_length = (632 + 4 * int(npts))
        if length != th_length:
            if is_file_name:
                f.close()
            msg = "Actual and theoretical file size are inconsistent.\n" \
                  "Actual/Theoretical: {}/{}\n" \
                  "Check that headers are consistent with time series."
            raise SacIOError(msg.format(length, th_length))

    # --------------------------------------------------------------
    # READ DATA
    # --------------------------------------------------------------
    if headonly:
        data = None
    else:
        data = from_buffer(f.read(int(npts) * 4), dtype=endian_str + 'f4')

        if len(data) != npts:
            if is_file_name:
                f.close()
            raise SacIOError("Cannot read all data points")

    if is_file_name:
        f.close()

    return hf, hi, hs, data
Пример #36
0
    def parse_next_trace(self):
        """
        Parse the next trace in the trace pointer list and return a Trace
        object.
        """
        trace_descriptor_block = self.file_pointer.read(32)
        # Check if the trace descriptor block id is valid.
        if unpack(self.endian + b'H', trace_descriptor_block[0:2])[0] != \
           0x4422:
            msg = 'Invalid trace descriptor block id.'
            raise SEG2InvalidFileError(msg)
        size_of_this_block = unpack(self.endian + b'H',
                                    trace_descriptor_block[2:4])[0]
        number_of_samples_in_data_block = \
            unpack(self.endian + b'L', trace_descriptor_block[8:12])[0]
        data_format_code = unpack(b'B', trace_descriptor_block[12:13])[0]

        # Parse the data format code.
        if data_format_code == 4:
            dtype = np.float32
            sample_size = 4
        elif data_format_code == 5:
            dtype = np.float64
            sample_size = 8
        elif data_format_code == 1:
            dtype = np.int16
            sample_size = 2
        elif data_format_code == 2:
            dtype = np.int32
            sample_size = 4
        elif data_format_code == 3:
            msg = ('\nData format code 3 (20-bit SEG-D floating point) not '
                   'supported yet.\nPlease contact the ObsPy developers with '
                   'a sample file.')
            raise NotImplementedError(msg)
        else:
            msg = 'Unrecognized data format code'
            raise SEG2InvalidFileError(msg)

        # The rest of the trace block is free form.
        header = {}
        header['seg2'] = AttribDict()
        self.parse_free_form(self.file_pointer.read(size_of_this_block - 32),
                             header['seg2'])
        header['delta'] = float(header['seg2']['SAMPLE_INTERVAL'])
        # Set to the file's start time.
        header['starttime'] = deepcopy(self.starttime)
        if 'DELAY' in header['seg2']:
            if float(header['seg2']['DELAY']) != 0:
                msg = "Non-zero value found in Trace's 'DELAY' field. " + \
                      "This is not supported/tested yet and might lead " + \
                      "to a wrong starttime of the Trace. Please contact " + \
                      "the ObsPy developers with a sample file."
                warnings.warn(msg)

        if "DESCALING_FACTOR" in header["seg2"]:
            header['calib'] = float(header['seg2']['DESCALING_FACTOR'])

        # Unpack the data.
        data = from_buffer(
            self.file_pointer.read(number_of_samples_in_data_block *
                                   sample_size),
            dtype=dtype)
        # Integrate SEG2 file header into each trace header
        tmp = self.stream.stats.seg2.copy()
        tmp.update(header['seg2'])
        header['seg2'] = tmp
        return Trace(data=data, header=header)
Пример #37
0
def _read_seisan(filename, headonly=False, **kwargs):  # @UnusedVariable
    """
    Reads a SEISAN file and returns an ObsPy Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: SEISAN file to be read.
    :rtype: :class:`~obspy.core.stream.Stream`
    :return: A ObsPy Stream object.

    .. rubric:: Example

    >>> from obspy import read
    >>> st = read("/path/to/2001-01-13-1742-24S.KONO__004")
    >>> st  # doctest: +ELLIPSIS
    <obspy.core.stream.Stream object at 0x...>
    >>> print(st)  # doctest: +ELLIPSIS
    4 Trace(s) in Stream:
    .KONO.0.B0Z | 2001-01-13T17:45:01.999000Z - ... | 20.0 Hz, 6000 samples
    .KONO.0.L0Z | 2001-01-13T17:42:24.924000Z - ... | 1.0 Hz, 3542 samples
    .KONO.0.L0N | 2001-01-13T17:42:24.924000Z - ... | 1.0 Hz, 3542 samples
    .KONO.0.L0E | 2001-01-13T17:42:24.924000Z - ... | 1.0 Hz, 3542 samples
    """
    # get version info from event file header (at least 12*80 bytes)
    fh = open(filename, 'rb')
    data = fh.read(80 * 12)
    (byteorder, arch, version) = _get_version(data)
    dlen = arch // 8
    dtype = np.dtype(native_str(byteorder + 'i' + str(dlen)))
    stype = native_str('=i' + str(dlen))

    def _readline(fh, version=version, dtype=dtype):
        if version >= 7:
            # On Sun, Linux, MaxOSX and PC from version 7.0 (using Digital
            # Fortran), every write is preceded and terminated with 4
            # additional bytes giving the number of bytes in the write.
            # With 64 bit systems, 8 bytes is used to define number of bytes
            # written.
            start_bytes = fh.read(dtype.itemsize)
            # convert to int32/int64
            length = np.fromstring(start_bytes, dtype=dtype)[0]
            data = fh.read(length)
            end_bytes = fh.read(dtype.itemsize)
            assert start_bytes == end_bytes
            return data
        else:  # version <= 6
            # Every write is preceded and terminated with one byte giving the
            # number of bytes in the write. If the write contains more than 128
            # bytes, it is blocked in records of 128 bytes, each with the start
            # and end byte which in this case is the number 128. Each record is
            # thus 130 bytes long.
            data = b''
            while True:
                start_byte = fh.read(1)
                if not start_byte:
                    # end of file
                    break
                # convert to unsigned int8
                length = np.fromstring(start_byte, np.uint8)[0]
                data += fh.read(length)
                end_byte = fh.read(1)
                assert start_byte == end_byte
                if length == 128:
                    # blocked data - repeat loop
                    continue
                # end of blocked data
                break
            return data

    # reset file pointer
    if version >= 7:
        fh.seek(0)
    else:
        # version <= 6 starts with first byte K
        fh.seek(1)
    # event file header
    # line 1
    data = _readline(fh)
    number_of_channels = int(data[30:33])
    # calculate number of lines with channels
    number_of_lines = number_of_channels // 3 + (number_of_channels % 3 and 1)
    if number_of_lines < 10:
        number_of_lines = 10
    # line 2 - always empty
    data = _readline(fh)
    # line 3
    for _i in range(0, number_of_lines):
        data = _readline(fh)
    # now parse each event file channel header + data
    stream = Stream()
    for _i in range(number_of_channels):
        # get channel header
        temp = _readline(fh).decode()
        # create Stats
        header = Stats()
        header['network'] = (temp[16] + temp[19]).strip()
        header['station'] = temp[0:5].strip()
        header['location'] = (temp[7] + temp[12]).strip()
        header['channel'] = (temp[5:7] + temp[8]).strip()
        header['sampling_rate'] = float(temp[36:43])
        header['npts'] = int(temp[43:50])
        # create start and end times
        year = int(temp[9:12]) + 1900
        month = int(temp[17:19])
        day = int(temp[20:22])
        hour = int(temp[23:25])
        mins = int(temp[26:28])
        secs = float(temp[29:35])
        header['starttime'] = UTCDateTime(year, month, day, hour, mins) + secs
        if headonly:
            # skip data
            from_buffer(_readline(fh), dtype=dtype)
            stream.append(Trace(header=header))
        else:
            # fetch data
            data = from_buffer(_readline(fh), dtype=dtype)
            # convert to system byte order
            data = np.require(data, stype)
            if header['npts'] != len(data):
                msg = "Mismatching byte size %d != %d"
                warnings.warn(msg % (header['npts'], len(data)))
            stream.append(Trace(data=data, header=header))
    fh.close()
    return stream
Пример #38
0
def _read_win(filename, century="20", **kwargs):  # @UnusedVariable
    """
    Reads a WIN file and returns a Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: WIN file to be read.
    :param century: WIN stores year as 2 numbers, need century to
        construct proper datetime.
    :rtype: :class:`~obspy.core.stream.Stream`
    :returns: Stream object containing header and data.
    """
    output = {}
    srates = {}

    # read win file
    with open(filename, "rb") as fpin:
        fpin.seek(0, 2)
        sz = fpin.tell()
        fpin.seek(0)
        leng = 0
        status0 = 0
        start = 0
        while leng < sz:
            pklen = fpin.read(4)
            if len(pklen) < 4:
                break
            leng = 4
            truelen = from_buffer(pklen, native_str('>i'))[0]
            if truelen == 0:
                break
            buff = fpin.read(6)
            leng += 6

            yy = "%s%02x" % (century, ord(buff[0:1]))
            mm = "%x" % ord(buff[1:2])
            dd = "%x" % ord(buff[2:3])
            hh = "%x" % ord(buff[3:4])
            mi = "%x" % ord(buff[4:5])
            sec = "%x" % ord(buff[5:6])

            date = UTCDateTime(int(yy), int(mm), int(dd), int(hh), int(mi),
                               int(sec))
            if start == 0:
                start = date
            if status0 == 0:
                sdata = None
            while leng < truelen:
                buff = fpin.read(4)
                leng += 4
                flag = '%02x' % ord(buff[0:1])
                chanum = '%02x' % ord(buff[1:2])
                chanum = "%02s%02s" % (flag, chanum)
                datawide = int('%x' % (ord(buff[2:3]) >> 4))
                srate = ord(buff[3:4])
                xlen = (srate - 1) * datawide
                if datawide == 0:
                    xlen = srate // 2
                    datawide = 0.5

                idata00 = fpin.read(4)
                leng += 4
                idata22 = from_buffer(idata00, native_str('>i'))[0]

                if chanum in output:
                    output[chanum].append(idata22)
                else:
                    output[chanum] = [
                        idata22,
                    ]
                    srates[chanum] = srate
                sdata = fpin.read(xlen)
                leng += xlen

                if len(sdata) < xlen:
                    fpin.seek(-(xlen - len(sdata)), 1)
                    sdata += fpin.read(xlen - len(sdata))
                    msg = "This shouldn't happen, it's weird..."
                    warnings.warn(msg)

                if datawide == 0.5:
                    for i in range(xlen):
                        idata2 = output[chanum][-1] + \
                            from_buffer(sdata[i:i + 1], np.int8)[0] >> 4
                        output[chanum].append(idata2)
                        idata2 = idata2 +\
                            (from_buffer(sdata[i:i + 1],
                                         np.int8)[0] << 4) >> 4
                        output[chanum].append(idata2)
                elif datawide == 1:
                    for i in range((xlen // datawide)):
                        idata2 = output[chanum][-1] +\
                            from_buffer(sdata[i:i + 1], np.int8)[0]
                        output[chanum].append(idata2)
                elif datawide == 2:
                    for i in range((xlen // datawide)):
                        idata2 = output[chanum][-1] +\
                            from_buffer(sdata[2 * i:2 * (i + 1)],
                                        native_str('>h'))[0]
                        output[chanum].append(idata2)
                elif datawide == 3:
                    for i in range((xlen // datawide)):
                        idata2 = output[chanum][-1] +\
                            from_buffer(sdata[3 * i:3 * (i + 1)] + b' ',
                                        native_str('>i'))[0] >> 8
                        output[chanum].append(idata2)
                elif datawide == 4:
                    for i in range((xlen // datawide)):
                        idata2 = output[chanum][-1] +\
                            from_buffer(sdata[4 * i:4 * (i + 1)],
                                        native_str('>i'))[0]
                        output[chanum].append(idata2)
                else:
                    msg = "DATAWIDE is %s " % datawide + \
                          "but only values of 0.5, 1, 2, 3 or 4 are supported."
                    raise NotImplementedError(msg)

    traces = []
    for i in output.keys():
        t = Trace(data=np.array(output[i]))
        t.stats.channel = str(i)
        t.stats.sampling_rate = float(srates[i])
        t.stats.starttime = start
        traces.append(t)
    return Stream(traces=traces)
Пример #39
0
def _read_y(filename, headonly=False, **kwargs):  # @UnusedVariable
    """
    Reads a Nanometrics Y file and returns an ObsPy Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: Nanometrics Y file to be read.
    :type headonly: bool, optional
    :param headonly: If set to True, read only the head. This is most useful
        for scanning available data in huge (temporary) data sets.
    :rtype: :class:`~obspy.core.stream.Stream`
    :return: A ObsPy Stream object.

    .. rubric:: Example

    >>> from obspy import read
    >>> st = read("/path/to/YAYT_BHZ_20021223.124800")
    >>> st  # doctest: +ELLIPSIS
    <obspy.core.stream.Stream object at 0x...>
    >>> print(st)  # doctest: +ELLIPSIS
    1 Trace(s) in Stream:
    .AYT..BHZ | 2002-12-23T12:48:00.000100Z - ... | 100.0 Hz, 18000 samples
    """
    # The first tag in a Y-file must be the TAG_Y_FILE (0) tag. This must be
    # followed by the following tags, in any order:
    #   TAG_STATION_INFO (1)
    #   TAG_STATION_LOCATION (2)
    #   TAG_STATION_PARAMETERS (3)
    #   TAG_STATION_DATABASE (4)
    #   TAG_SERIES_INFO (5)
    #   TAG_SERIES_DATABASE (6)
    # The following tag is optional:
    #   TAG_STATION_RESPONSE (26)
    # The last tag in the file must be a TAG_DATA_INT32 (7) tag. This tag must
    # be followed by an array of LONG's. The number of entries in the array
    # must agree with what was described in the TAG_SERIES_INFO data.
    with open(filename, 'rb') as fh:
        trace = Trace()
        trace.stats.y = AttribDict()
        count = -1
        while True:
            endian, tag_type, next_tag, _next_same = _parse_tag(fh)
            if tag_type == 1:
                # TAG_STATION_INFO
                # UCHAR Update[8]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # UCHAR Station[5] (BLANKPAD)
                #   Station is the five letter SEED format station
                #   identification.
                # UCHAR Location[2] (BLANKPAD)
                #   Location Location is the two letter SEED format location
                #   identification.
                # UCHAR Channel[3] (BLANKPAD)
                #   Channel Channel is the three letter SEED format channel
                #   identification.
                # UCHAR NetworkID[51] (ASCIIZ)
                #   This is some descriptive text identifying the network.
                # UCHAR SiteName[61] (ASCIIZ)
                #   SiteName is some text identifying the site.
                # UCHAR Comment[31] (ASCIIZ)
                #   Comment is any comment for this station.
                # UCHAR SensorType[51] (ASCIIZ)
                #   SensorType is some text describing the type of sensor used
                #   at the station.
                # UCHAR DataFormat[7] (ASCIIZ)
                #   DataFormat is some text describing the data format recorded
                #   at the station.
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(b'5s2s3s51z61z31z51z7z',
                                                       data[8:])
                trace.stats.station = parts[0]
                trace.stats.location = parts[1]
                trace.stats.channel = parts[2]
                # extra
                params = AttribDict()
                params.network_id = parts[3]
                params.site_name = parts[4]
                params.comment = parts[5]
                params.sensor_type = parts[6]
                params.data_format = parts[7]
                trace.stats.y.tag_station_info = params
            elif tag_type == 2:
                # TAG_STATION_LOCATION
                # UCHAR Update[8]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # FLOAT Latitude
                #   Latitude in degrees of the location of the station. The
                #   latitude should be between -90 (South) and +90 (North).
                # FLOAT Longitude
                #   Longitude in degrees of the location of the station. The
                #   longitude should be between -180 (West) and +180 (East).
                # FLOAT Elevation
                #   Elevation in meters above sea level of the station.
                # FLOAT Depth
                #   Depth is the depth in meters of the sensor.
                # FLOAT Azimuth
                #   Azimuth of the sensor in degrees clockwise.
                # FLOAT Dip
                #   Dip is the dip of the sensor. 90 degrees is defined as
                #   vertical right way up.
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(endian + b'ffffff',
                                                       data[8:])
                params = AttribDict()
                params.latitude = parts[0]
                params.longitude = parts[1]
                params.elevation = parts[2]
                params.depth = parts[3]
                params.azimuth = parts[4]
                params.dip = parts[5]
                trace.stats.y.tag_station_location = params
            elif tag_type == 3:
                # TAG_STATION_PARAMETERS
                # UCHAR Update[16]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # REALTIME StartValidTime
                #   Time that the information in these records became valid.
                # REALTIME EndValidTime
                #   Time that the information in these records became invalid.
                # FLOAT Sensitivity
                #   Sensitivity of the sensor in nanometers per bit.
                # FLOAT SensFreq
                #   Frequency at which the sensitivity was measured.
                # FLOAT SampleRate
                #   This is the number of samples per second. This value can be
                #   less than 1.0. (i.e. 0.1)
                # FLOAT MaxClkDrift
                #   Maximum drift rate of the clock in seconds per sample.
                # UCHAR SensUnits[24] (ASCIIZ)
                #   Some text indicating the units in which the sensitivity was
                #   measured.
                # UCHAR CalibUnits[24] (ASCIIZ)
                #   Some text indicating the units in which calibration input
                #   was measured.
                # UCHAR ChanFlags[27] (BLANKPAD)
                #   Text indicating the channel flags according to the SEED
                #   definition.
                # UCHAR UpdateFlag
                #   This flag must be “N” or “U” according to the SEED
                #   definition.
                # UCHAR Filler[4]
                #   Filler Pads out the record to satisfy the alignment
                #   restrictions for reading data on a SPARC processor.
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(
                    endian + b'ddffff24z24z27sc4s', data[16:])
                trace.stats.sampling_rate = parts[4]
                # extra
                params = AttribDict()
                params.start_valid_time = parts[0]
                params.end_valid_time = parts[1]
                params.sensitivity = parts[2]
                params.sens_freq = parts[3]
                params.sample_rate = parts[4]
                params.max_clk_drift = parts[5]
                params.sens_units = parts[6]
                params.calib_units = parts[7]
                params.chan_flags = parts[8]
                params.update_flag = parts[9]
                trace.stats.y.tag_station_parameters = params
            elif tag_type == 4:
                # TAG_STATION_DATABASE
                # UCHAR Update[8]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # REALTIME LoadDate
                #   Date the information was loaded into the database.
                # UCHAR Key[16]
                #   Unique key that identifies this record in the database.
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(endian + b'd16s',
                                                       data[8:])
                params = AttribDict()
                params.load_date = parts[0]
                params.key = parts[1]
                trace.stats.y.tag_station_database = params
            elif tag_type == 5:
                # TAG_SERIES_INFO
                # UCHAR Update[16]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # REALTIME StartTime
                #   This is start time of the data in this series.
                # REALTIME EndTime
                #   This is end time of the data in this series.
                # ULONG NumSamples
                #   This is the number of samples of data in this series.
                # LONG DCOffset
                #   DCOffset is the DC offset of the data.
                # LONG MaxAmplitude
                #   MaxAmplitude is the maximum amplitude of the data.
                # LONG MinAmplitude
                #   MinAmplitude is the minimum amplitude of the data.
                # UCHAR Format[8] (ASCIIZ)
                #   This is the format of the data. This should always be
                #   “YFILE”.
                # UCHAR FormatVersion[8] (ASCIIZ)
                #   FormatVersion is the version of the format of the data.
                #   This should always be “5.0”
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(endian + b'ddLlll8z8z',
                                                       data[16:])
                trace.stats.starttime = UTCDateTime(parts[0])
                count = parts[2]
                # extra
                params = AttribDict()
                params.endtime = UTCDateTime(parts[1])
                params.num_samples = parts[2]
                params.dc_offset = parts[3]
                params.max_amplitude = parts[4]
                params.min_amplitude = parts[5]
                params.format = parts[6]
                params.format_version = parts[7]
                trace.stats.y.tag_series_info = params
            elif tag_type == 6:
                # TAG_SERIES_DATABASE
                # UCHAR Update[8]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # REALTIME LoadDate
                #   Date the information was loaded into the database.
                # UCHAR Key[16]
                #   Unique key that identifies this record in the database.
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(endian + b'd16s',
                                                       data[8:])
                params = AttribDict()
                params.load_date = parts[0]
                params.key = parts[1]
                trace.stats.y.tag_series_database = params
            elif tag_type == 26:
                # TAG_STATION_RESPONSE
                # UCHAR Update[8]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # UCHAR PathName[260]
                #  PathName is the full name of the file which contains the
                #  response information for this station.
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(b'260s', data[8:])
                params = AttribDict()
                params.path_name = parts[0]
                trace.stats.y.tag_station_response = params
            elif tag_type == 7:
                # TAG_DATA_INT32
                trace.data = from_buffer(fh.read(
                    np.dtype(np.int32).itemsize * count),
                                         dtype=np.int32)
                # break loop as TAG_DATA_INT32 should be the last tag in file
                break
            else:
                fh.seek(next_tag, 1)
    return Stream([trace])
Пример #40
0
def _read_mseed(mseed_object,
                starttime=None,
                endtime=None,
                headonly=False,
                sourcename=None,
                reclen=None,
                details=False,
                header_byteorder=None,
                verbose=None,
                **kwargs):
    """
    Reads a Mini-SEED file and returns a Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :param mseed_object: Filename or open file like object that contains the
        binary Mini-SEED data. Any object that provides a read() method will be
        considered to be a file like object.
    :type starttime: :class:`~obspy.core.utcdatetime.UTCDateTime`
    :param starttime: Only read data samples after or at the start time.
    :type endtime: :class:`~obspy.core.utcdatetime.UTCDateTime`
    :param endtime: Only read data samples before or at the end time.
    :param headonly: Determines whether or not to unpack the data or just
        read the headers.
    :type sourcename: str
    :param sourcename: Only read data with matching SEED ID (can contain
        wildcards "?" and "*", e.g. "BW.UH2.*" or "*.??Z"). Defaults to
        ``None``.
    :param reclen: If it is None, it will be automatically determined for every
        record. If it is known, just set it to the record length in bytes which
        will increase the reading speed slightly.
    :type details: bool, optional
    :param details: If ``True`` read additional information: timing quality
        and availability of calibration information.
        Note, that the traces are then also split on these additional
        information. Thus the number of traces in a stream will change.
        Details are stored in the mseed stats AttribDict of each trace.
        ``False`` specifies for both cases, that this information is not
        available. ``blkt1001.timing_quality`` specifies the timing quality
        from 0 to 100 [%]. ``calibration_type`` specifies the type of available
        calibration information blockettes:

        - ``1``: Step Calibration (Blockette 300)
        - ``2``: Sine Calibration (Blockette 310)
        - ``3``: Pseudo-random Calibration (Blockette 320)
        - ``4``: Generic Calibration  (Blockette 390)
        - ``-2``: Calibration Abort (Blockette 395)

    :type header_byteorder: int or str, optional
    :param header_byteorder: Must be either ``0`` or ``'<'`` for LSBF or
        little-endian, ``1`` or ``'>'`` for MBF or big-endian. ``'='`` is the
        native byte order. Used to enforce the header byte order. Useful in
        some rare cases where the automatic byte order detection fails.

    .. rubric:: Example

    >>> from obspy import read
    >>> st = read("/path/to/two_channels.mseed")
    >>> print(st)  # doctest: +ELLIPSIS
    2 Trace(s) in Stream:
    BW.UH3..EHE | 2010-06-20T00:00:00.279999Z - ... | 200.0 Hz, 386 samples
    BW.UH3..EHZ | 2010-06-20T00:00:00.279999Z - ... | 200.0 Hz, 386 samples

    >>> from obspy import UTCDateTime
    >>> st = read("/path/to/two_channels.mseed",
    ...           starttime=UTCDateTime("2010-06-20T00:00:01"),
    ...           sourcename="*.?HZ")
    >>> print(st)  # doctest: +ELLIPSIS
    1 Trace(s) in Stream:
    BW.UH3..EHZ | 2010-06-20T00:00:00.999999Z - ... | 200.0 Hz, 242 samples

    Read with ``details=True`` to read more details of the file if present.

    >>> st = read("/path/to/timingquality.mseed", details=True)
    >>> print(st[0].stats.mseed.blkt1001.timing_quality)
    55

    ``False`` means that the necessary information could not be found in the
    file.

    >>> print(st[0].stats.mseed.calibration_type)
    False

    Note that each change in timing quality from record to record may trigger a
    new Trace object to be created so the Stream object may contain many Trace
    objects if ``details=True`` is used.

    >>> print(len(st))
    101
    """
    # Parse the headonly and reclen flags.
    if headonly is True:
        unpack_data = 0
    else:
        unpack_data = 1
    if reclen is None:
        reclen = -1
    elif reclen not in VALID_RECORD_LENGTHS:
        msg = 'Invalid record length. Autodetection will be used.'
        warnings.warn(msg)
        reclen = -1

    # Determine the byte order.
    if header_byteorder == "=":
        header_byteorder = NATIVE_BYTEORDER

    if header_byteorder is None:
        header_byteorder = -1
    elif header_byteorder in [0, "0", "<"]:
        header_byteorder = 0
    elif header_byteorder in [1, "1", ">"]:
        header_byteorder = 1

    # Parse some information about the file.
    if header_byteorder == 0:
        bo = "<"
    elif header_byteorder > 0:
        bo = ">"
    else:
        bo = None

    # Determine total size. Either its a file-like object.
    if hasattr(mseed_object, "tell") and hasattr(mseed_object, "seek"):
        cur_pos = mseed_object.tell()
        mseed_object.seek(0, 2)
        length = mseed_object.tell() - cur_pos
        mseed_object.seek(cur_pos, 0)
    # Or a file name.
    else:
        length = os.path.getsize(mseed_object)

    if length < 128:
        msg = "The smallest possible mini-SEED record is made up of 128 " \
              "bytes. The passed buffer or file contains only %i." % length
        raise ObsPyMSEEDFilesizeTooSmallError(msg)
    elif length > 2**31:
        msg = ("ObsPy can currently not directly read mini-SEED files that "
               "are larger than 2^31 bytes (2048 MiB). To still read it, "
               "please read the file in chunks as documented here: "
               "https://github.com/obspy/obspy/pull/1419"
               "#issuecomment-221582369")
        raise ObsPyMSEEDFilesizeTooLargeError(msg)

    info = util.get_record_information(mseed_object, endian=bo)

    # Map the encoding to a readable string value.
    if "encoding" not in info:
        # Hopefully detected by libmseed.
        info["encoding"] = None
    elif info["encoding"] in ENCODINGS:
        info['encoding'] = ENCODINGS[info['encoding']][0]
    elif info["encoding"] in UNSUPPORTED_ENCODINGS:
        msg = ("Encoding '%s' (%i) is not supported by ObsPy. Please send "
               "the file to the ObsPy developers so that we can add "
               "support for it.") % \
            (UNSUPPORTED_ENCODINGS[info['encoding']], info['encoding'])
        raise ValueError(msg)
    else:
        msg = "Encoding '%i' is not a valid MiniSEED encoding." % \
            info['encoding']
        raise ValueError(msg)

    record_length = info["record_length"]

    # Only keep information relevant for the whole file.
    info = {'filesize': info['filesize']}

    # If it's a file name just read it.
    if isinstance(mseed_object, (str, native_str)):
        # Read to NumPy array which is used as a buffer.
        bfr_np = np.fromfile(mseed_object, dtype=np.int8)
    elif hasattr(mseed_object, 'read'):
        bfr_np = from_buffer(mseed_object.read(), dtype=np.int8)

    # Search for data records and pass only the data part to the underlying C
    # routine.
    offset = 0
    # 0 to 9 are defined in a row in the ASCII charset.
    min_ascii = ord('0')

    # Small function to check whether an array of ASCII values contains only
    # digits.
    def isdigit(x):
        return True if (x - min_ascii).max() <= 9 else False

    while True:
        # This should never happen
        if (isdigit(bfr_np[offset:offset + 6]) is False) or \
                (bfr_np[offset + 6] not in VALID_CONTROL_HEADERS):
            msg = 'Not a valid (Mini-)SEED file'
            raise Exception(msg)
        elif bfr_np[offset + 6] in SEED_CONTROL_HEADERS:
            offset += record_length
            continue
        break
    bfr_np = bfr_np[offset:]
    buflen = len(bfr_np)

    # If no selection is given pass None to the C function.
    if starttime is None and endtime is None and sourcename is None:
        selections = None
    else:
        select_time = SelectTime()
        selections = Selections()
        selections.timewindows.contents = select_time
        if starttime is not None:
            if not isinstance(starttime, UTCDateTime):
                msg = 'starttime needs to be a UTCDateTime object'
                raise ValueError(msg)
            selections.timewindows.contents.starttime = \
                util._convert_datetime_to_mstime(starttime)
        else:
            # HPTERROR results in no starttime.
            selections.timewindows.contents.starttime = HPTERROR
        if endtime is not None:
            if not isinstance(endtime, UTCDateTime):
                msg = 'endtime needs to be a UTCDateTime object'
                raise ValueError(msg)
            selections.timewindows.contents.endtime = \
                util._convert_datetime_to_mstime(endtime)
        else:
            # HPTERROR results in no starttime.
            selections.timewindows.contents.endtime = HPTERROR
        if sourcename is not None:
            if not isinstance(sourcename, (str, native_str)):
                msg = 'sourcename needs to be a string'
                raise ValueError(msg)
            # libmseed uses underscores as separators and allows filtering
            # after the dataquality which is disabled here to not confuse
            # users. (* == all data qualities)
            selections.srcname = (sourcename.replace('.', '_') + '_*').\
                encode('ascii', 'ignore')
        else:
            selections.srcname = b'*'
    all_data = []

    # Use a callback function to allocate the memory and keep track of the
    # data.
    def allocate_data(samplecount, sampletype):
        # Enhanced sanity checking for libmseed 2.10 can result in the
        # sampletype not being set. Just return an empty array in this case.
        if sampletype == b"\x00":
            data = np.empty(0)
        else:
            data = np.empty(samplecount, dtype=DATATYPES[sampletype])
        all_data.append(data)
        return data.ctypes.data

    # XXX: Do this properly!
    # Define Python callback function for use in C function. Return a long so
    # it hopefully works on 32 and 64 bit systems.
    alloc_data = C.CFUNCTYPE(C.c_longlong, C.c_int, C.c_char)(allocate_data)

    try:
        verbose = int(verbose)
    except Exception:
        verbose = 0

    clibmseed.verbose = bool(verbose)
    try:
        lil = clibmseed.readMSEEDBuffer(bfr_np, buflen, selections,
                                        C.c_int8(unpack_data), reclen,
                                        C.c_int8(verbose), C.c_int8(details),
                                        header_byteorder, alloc_data)
    except InternalMSEEDError as e:
        msg = e.args[0]
        if offset and offset in str(e):
            # Append the offset of the full SEED header if necessary. That way
            # the C code does not have to deal with it.
            if offset and "offset" in msg:
                msg = ("%s\nThe file contains a %i byte dataless part at the "
                       "beginning. Make sure to add that to the reported "
                       "offset to get the actual location in the file." %
                       (msg, offset))
                raise InternalMSEEDError(msg)
        else:
            raise
    finally:
        # Make sure to reset the verbosity.
        clibmseed.verbose = True

    del selections

    traces = []
    try:
        current_id = lil.contents
    # Return stream if not traces are found.
    except ValueError:
        clibmseed.lil_free(lil)
        del lil
        return Stream()

    while True:
        # Init header with the essential information.
        header = {
            'network': current_id.network.strip(),
            'station': current_id.station.strip(),
            'location': current_id.location.strip(),
            'channel': current_id.channel.strip(),
            'mseed': {
                'dataquality': current_id.dataquality
            }
        }
        # Loop over segments.
        try:
            current_segment = current_id.firstSegment.contents
        except ValueError:
            break
        while True:
            header['sampling_rate'] = current_segment.samprate
            header['starttime'] = \
                util._convert_mstime_to_datetime(current_segment.starttime)
            header['mseed']['number_of_records'] = current_segment.recordcnt
            header['mseed']['encoding'] = \
                ENCODINGS[current_segment.encoding][0]
            header['mseed']['byteorder'] = \
                "<" if current_segment.byteorder == 0 else ">"
            header['mseed']['record_length'] = current_segment.reclen
            if details:
                timing_quality = current_segment.timing_quality
                if timing_quality == 0xFF:  # 0xFF is mask for not known timing
                    timing_quality = False
                header['mseed']['blkt1001'] = {}
                header['mseed']['blkt1001']['timing_quality'] = timing_quality
                header['mseed']['calibration_type'] = \
                    current_segment.calibration_type \
                    if current_segment.calibration_type != -1 else False

            if headonly is False:
                # The data always will be in sequential order.
                data = all_data.pop(0)
                header['npts'] = len(data)
            else:
                data = np.array([])
                header['npts'] = current_segment.samplecnt
            # Make sure to init the number of samples.
            # Py3k: convert to unicode
            header['mseed'] = dict(
                (k, v.decode()) if isinstance(v, bytes) else (k, v)
                for k, v in header['mseed'].items())
            header = dict((k, v.decode()) if isinstance(v, bytes) else (k, v)
                          for k, v in header.items())
            trace = Trace(header=header, data=data)
            # Append global information.
            for key, value in info.items():
                setattr(trace.stats.mseed, key, value)
            traces.append(trace)
            # A Null pointer access results in a ValueError
            try:
                current_segment = current_segment.next.contents
            except ValueError:
                break
        try:
            current_id = current_id.next.contents
        except ValueError:
            break

    clibmseed.lil_free(lil)  # NOQA
    del lil  # NOQA
    return Stream(traces=traces)
Пример #41
0
def _read_q(filename, headonly=False, data_directory=None, byteorder='=',
            **kwargs):  # @UnusedVariable
    """
    Reads a Seismic Handler Q file and returns an ObsPy Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: Q header file to be read. Must have a `QHD` file
        extension.
    :type headonly: bool, optional
    :param headonly: If set to True, read only the head. This is most useful
        for scanning available data in huge (temporary) data sets.
    :type data_directory: str, optional
    :param data_directory: Data directory where the corresponding QBN file can
        be found.
    :type byteorder: str, optional
    :param byteorder: Enforce byte order for data file. This is important for
        Q files written in older versions of Seismic Handler, which don't
        explicit state the `BYTEORDER` flag within the header file. Can be
        little endian (``'<'``), big endian (``'>'``), or native byte order
        (``'='``). Defaults to ``'='``.
    :rtype: :class:`~obspy.core.stream.Stream`
    :return: A ObsPy Stream object.

    Q files consists of two files per data set:

    * a ASCII header file with file extension `QHD` and the
    * binary data file with file extension `QBN`.

    The read method only accepts header files for the ``filename`` parameter.
    ObsPy assumes that the corresponding data file is within the same directory
    if the ``data_directory`` parameter is not set. Otherwise it will search
    in the given ``data_directory`` for a file with the `QBN` file extension.
    This function should NOT be called directly, it registers via the
    ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    .. rubric:: Example

    >>> from obspy import read
    >>> st = read("/path/to/QFILE-TEST.QHD")
    >>> st    #doctest: +ELLIPSIS
    <obspy.core.stream.Stream object at 0x...>
    >>> print(st)  # doctest: +ELLIPSIS
    3 Trace(s) in Stream:
    .TEST..BHN | 2009-10-01T12:46:01.000000Z - ... | 20.0 Hz, 801 samples
    .TEST..BHE | 2009-10-01T12:46:01.000000Z - ... | 20.0 Hz, 801 samples
    .WET..HHZ  | 2010-01-01T01:01:05.999000Z - ... | 100.0 Hz, 4001 samples
    """
    if not headonly:
        if not data_directory:
            data_file = os.path.splitext(filename)[0] + '.QBN'
        else:
            data_file = os.path.basename(os.path.splitext(filename)[0])
            data_file = os.path.join(data_directory, data_file + '.QBN')
        if not os.path.isfile(data_file):
            msg = "Can't find corresponding QBN file at %s."
            raise IOError(msg % data_file)
        fh_data = open(data_file, 'rb')
    # loop through read header file
    with open(filename, 'rt') as fh:
        lines = fh.read().splitlines()
    # number of comment lines
    cmtlines = int(lines[0][5:7])
    # trace lines
    traces = {}
    i = -1
    id = ''
    for line in lines[cmtlines:]:
        cid = int(line[0:2])
        if cid != id:
            id = cid
            i += 1
        traces.setdefault(i, '')
        traces[i] += line[3:]
    # create stream object
    stream = Stream()
    for id in sorted(traces.keys()):
        # fetch headers
        header = {}
        header['sh'] = {
            "FROMQ": True,
            "FILE": os.path.splitext(os.path.split(filename)[1])[0],
        }
        channel = ['', '', '']
        npts = 0
        for item in traces[id].split('~'):
            key = item.lstrip()[0:4]
            value = item.lstrip()[5:]
            if key == 'L001':
                npts = header['npts'] = int(value)
            elif key == 'L000':
                continue
            elif key == 'R000':
                header['delta'] = float(value)
            elif key == 'R026':
                header['calib'] = float(value)
            elif key == 'S001':
                header['station'] = value
            elif key == 'C000' and value:
                channel[2] = value[0]
            elif key == 'C001' and value:
                channel[0] = value[0]
            elif key == 'C002' and value:
                channel[1] = value[0]
            elif key == 'C003':
                if value == '<' or value == '>':
                    byteorder = header['sh']['BYTEORDER'] = value
            elif key == 'S021':
                # 01-JAN-2009_01:01:01.0
                # 1-OCT-2009_12:46:01.000
                header['starttime'] = to_utcdatetime(value)
            elif key == 'S022':
                header['sh']['P-ONSET'] = to_utcdatetime(value)
            elif key == 'S023':
                header['sh']['S-ONSET'] = to_utcdatetime(value)
            elif key == 'S024':
                header['sh']['ORIGIN'] = to_utcdatetime(value)
            elif key:
                key = INVERTED_SH_IDX.get(key, key)
                if key in SH_KEYS_INT:
                    header['sh'][key] = int(value)
                elif key in SH_KEYS_FLOAT:
                    header['sh'][key] = float(value)
                else:
                    header['sh'][key] = value
        # set channel code
        header['channel'] = ''.join(channel)
        # remember record number
        header['sh']['RECNO'] = len(stream) + 1
        if headonly:
            # skip data
            stream.append(Trace(header=header))
        else:
            if not npts:
                stream.append(Trace(header=header))
                continue
            # read data
            data = fh_data.read(npts * 4)
            dtype = native_str(byteorder + 'f4')
            data = from_buffer(data, dtype=dtype)
            # convert to system byte order
            data = np.require(data, native_str('=f4'))
            stream.append(Trace(data=data, header=header))
    if not headonly:
        fh_data.close()
    return stream
Пример #42
0
    def test_pack_segy_data(self):
        """
        Tests the packing of various SEG Y files.
        """
        # Loop over all files.
        for file, attribs in self.files.items():
            # Get some attributes.
            data_format = attribs['data_sample_enc']
            endian = attribs['endian']
            count = attribs['sample_count']
            size = attribs['sample_size']
            non_normalized_samples = attribs['non_normalized_samples']
            dtype = self.dtypes[data_format]
            file = os.path.join(self.path, file)
            # Load the data. This data has previously been unpacked by
            # Madagascar.
            data = np.load(file + '.npy').ravel()
            data = np.require(data, dtype)
            # Load the packed data.
            with open(file, 'rb') as f:
                # Jump to the beginning of the data.
                f.seek(3200 + 400 + 240)
                packed_data = f.read(count * size)
            # The pack functions all write to file objects.
            f = io.BytesIO()
            # Pack the data.
            DATA_SAMPLE_FORMAT_PACK_FUNCTIONS[data_format](f, data, endian)
            # Read again.0.
            f.seek(0, 0)
            new_packed_data = f.read()
            # Check the length.
            self.assertEqual(len(packed_data), len(new_packed_data))
            if len(non_normalized_samples) == 0:
                # The packed data should be totally identical.
                self.assertEqual(packed_data, new_packed_data)
            else:
                # Some test files contain non normalized IBM floating point
                # data. These cannot be reproduced exactly.
                # Just a sanity check to be sure it is only IBM floating point
                # data that does not work completely.
                self.assertEqual(data_format, 1)

                # Read the data as uint8 to be able to directly access the
                # different bytes.
                # Original data.
                packed_data = from_buffer(packed_data, np.uint8)
                # Newly written.
                new_packed_data = from_buffer(new_packed_data, np.uint8)

                # Figure out the non normalized fractions in the original data
                # because these cannot be compared directly.
                # Get the position of the first byte of the fraction depending
                # on the endianness.
                if endian == '>':
                    start = 1
                else:
                    start = 2
                # The first byte of the fraction.
                first_fraction_byte_old = packed_data[start::4]
                # First get all zeros in the original data because zeros have
                # to be treated differently.
                zeros = np.where(data == 0)[0]
                # Create a copy and set the zeros to a high number to be able
                # to find all non normalized numbers.
                fraction_copy = first_fraction_byte_old.copy()
                fraction_copy[zeros] = 255
                # Normalized numbers will have no zeros in the first 4 bit of
                # the fraction. This means that the most significant byte of
                # the fraction has to be at least 16 for it to be normalized.
                non_normalized = np.where(fraction_copy < 16)[0]

                # Sanity check if the file data and the calculated data are the
                # same.
                np.testing.assert_array_equal(non_normalized,
                                              np.array(non_normalized_samples))

                # Test all other parts of the packed data. Set dtype to int32
                # to get 4 byte numbers.
                packed_data_copy = packed_data.copy()
                new_packed_data_copy = new_packed_data.copy()
                packed_data_copy.dtype = np.int32
                new_packed_data_copy.dtype = np.int32
                # Equalize the non normalized parts.
                packed_data_copy[non_normalized] = \
                    new_packed_data_copy[non_normalized]
                np.testing.assert_array_equal(packed_data_copy,
                                              new_packed_data_copy)

                # Now check the non normalized parts if they are almost the
                # same.
                data = data[non_normalized]
                # Unpack the data again.
                new_packed_data.dtype = np.int32
                new_packed_data = new_packed_data[non_normalized]
                length = len(new_packed_data)
                f = io.BytesIO()
                f.write(new_packed_data.tostring())
                f.seek(0, 0)
                new_data = DATA_SAMPLE_FORMAT_UNPACK_FUNCTIONS[1](
                    f, length, endian)
                f.close()
                packed_data.dtype = np.int32
                packed_data = packed_data[non_normalized]
                length = len(packed_data)
                f = io.BytesIO()
                f.write(packed_data.tostring())
                f.seek(0, 0)
                old_data = DATA_SAMPLE_FORMAT_UNPACK_FUNCTIONS[1](
                    f, length, endian)
                f.close()
                # This works because the normalized and the non normalized IBM
                # floating point numbers will be close enough for the internal
                # IEEE representation to be identical.
                np.testing.assert_array_equal(data, new_data)
                np.testing.assert_array_equal(data, old_data)
Пример #43
0
def _read_q(filename,
            headonly=False,
            data_directory=None,
            byteorder='=',
            **kwargs):  # @UnusedVariable
    """
    Reads a Seismic Handler Q file and returns an ObsPy Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: Q header file to be read. Must have a `QHD` file
        extension.
    :type headonly: bool, optional
    :param headonly: If set to True, read only the head. This is most useful
        for scanning available data in huge (temporary) data sets.
    :type data_directory: str, optional
    :param data_directory: Data directory where the corresponding QBN file can
        be found.
    :type byteorder: str, optional
    :param byteorder: Enforce byte order for data file. This is important for
        Q files written in older versions of Seismic Handler, which don't
        explicit state the `BYTEORDER` flag within the header file. Can be
        little endian (``'<'``), big endian (``'>'``), or native byte order
        (``'='``). Defaults to ``'='``.
    :rtype: :class:`~obspy.core.stream.Stream`
    :return: A ObsPy Stream object.

    Q files consists of two files per data set:

    * a ASCII header file with file extension `QHD` and the
    * binary data file with file extension `QBN`.

    The read method only accepts header files for the ``filename`` parameter.
    ObsPy assumes that the corresponding data file is within the same directory
    if the ``data_directory`` parameter is not set. Otherwise it will search
    in the given ``data_directory`` for a file with the `QBN` file extension.
    This function should NOT be called directly, it registers via the
    ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    .. rubric:: Example

    >>> from obspy import read
    >>> st = read("/path/to/QFILE-TEST.QHD")
    >>> st    #doctest: +ELLIPSIS
    <obspy.core.stream.Stream object at 0x...>
    >>> print(st)  # doctest: +ELLIPSIS
    3 Trace(s) in Stream:
    .TEST..BHN | 2009-10-01T12:46:01.000000Z - ... | 20.0 Hz, 801 samples
    .TEST..BHE | 2009-10-01T12:46:01.000000Z - ... | 20.0 Hz, 801 samples
    .WET..HHZ  | 2010-01-01T01:01:05.999000Z - ... | 100.0 Hz, 4001 samples
    """
    if not headonly:
        if not data_directory:
            data_file = os.path.splitext(filename)[0] + '.QBN'
        else:
            data_file = os.path.basename(os.path.splitext(filename)[0])
            data_file = os.path.join(data_directory, data_file + '.QBN')
        if not os.path.isfile(data_file):
            msg = "Can't find corresponding QBN file at %s."
            raise IOError(msg % data_file)
        fh_data = open(data_file, 'rb')
    # loop through read header file
    with open(filename, 'rt') as fh:
        lines = fh.read().splitlines()
    # number of comment lines
    cmtlines = int(lines[0][5:7])
    # trace lines
    traces = {}
    i = -1
    id = ''
    for line in lines[cmtlines:]:
        cid = int(line[0:2])
        if cid != id:
            id = cid
            i += 1
        traces.setdefault(i, '')
        traces[i] += line[3:]
    # create stream object
    stream = Stream()
    for id in sorted(traces.keys()):
        # fetch headers
        header = {}
        header['sh'] = {
            "FROMQ": True,
            "FILE": os.path.splitext(os.path.split(filename)[1])[0],
        }
        channel = ['', '', '']
        npts = 0
        for item in traces[id].split('~'):
            key = item.lstrip()[0:4]
            value = item.lstrip()[5:]
            if key == 'L001':
                npts = header['npts'] = int(value)
            elif key == 'L000':
                continue
            elif key == 'R000':
                header['delta'] = float(value)
            elif key == 'R026':
                header['calib'] = float(value)
            elif key == 'S001':
                header['station'] = value
            elif key == 'C000' and value:
                channel[2] = value[0]
            elif key == 'C001' and value:
                channel[0] = value[0]
            elif key == 'C002' and value:
                channel[1] = value[0]
            elif key == 'C003':
                if value == '<' or value == '>':
                    byteorder = header['sh']['BYTEORDER'] = value
            elif key == 'S021':
                # 01-JAN-2009_01:01:01.0
                # 1-OCT-2009_12:46:01.000
                header['starttime'] = to_utcdatetime(value)
            elif key == 'S022':
                header['sh']['P-ONSET'] = to_utcdatetime(value)
            elif key == 'S023':
                header['sh']['S-ONSET'] = to_utcdatetime(value)
            elif key == 'S024':
                header['sh']['ORIGIN'] = to_utcdatetime(value)
            elif key:
                key = INVERTED_SH_IDX.get(key, key)
                if key in SH_KEYS_INT:
                    header['sh'][key] = int(value)
                elif key in SH_KEYS_FLOAT:
                    header['sh'][key] = float(value)
                else:
                    header['sh'][key] = value
        # set channel code
        header['channel'] = ''.join(channel)
        # remember record number
        header['sh']['RECNO'] = len(stream) + 1
        if headonly:
            # skip data
            stream.append(Trace(header=header))
        else:
            if not npts:
                stream.append(Trace(header=header))
                continue
            # read data
            data = fh_data.read(npts * 4)
            dtype = native_str(byteorder + 'f4')
            data = from_buffer(data, dtype=dtype)
            # convert to system byte order
            data = np.require(data, native_str('=f4'))
            stream.append(Trace(data=data, header=header))
    if not headonly:
        fh_data.close()
    return stream
Пример #44
0
def read_wave_server_v(server, port, scnl, start, end, timeout=None,
                       cleanup=False):
    """
    Reads data for specified time interval and scnl on specified waveserverV.

    Returns list of TraceBuf2 objects
    """
    rid = 'rwserv'
    scnlstr = '%s %s %s %s' % scnl
    reqstr = 'GETSCNLRAW: %s %s %f %f\n' % (rid, scnlstr, start, end)
    sock = send_sock_req(server, port, reqstr.encode('ascii', 'strict'),
                         timeout=timeout)
    r = get_sock_char_line(sock, timeout=timeout)
    if not r:
        return []
    tokens = str(r.decode()).split()
    flag = tokens[6]
    if flag != 'F':
        msg = 'read_wave_server_v returned flag %s - %s'
        print(msg % (flag, RETURNFLAG_KEY[flag]), file=sys.stderr)
        return []
    nbytes = int(tokens[-1])
    dat = get_sock_bytes(sock, nbytes, timeout=timeout)
    sock.close()

    tbl = []
    bytesread = 1
    p = 0
    dat_len = len(dat)
    current_tb = None
    period = None
    bufs = None

    while bytesread and p < dat_len:
        if not dat_len > p + 64:
            break  # no tracebufs left

        new_tb = TraceBuf2()
        new_tb.parse_header(dat[p:p + 64])
        p += 64
        nbytes = new_tb.ndata * new_tb.inputType.itemsize

        if dat_len < p + nbytes:
            break   # not enough array to hold data specified in header

        if current_tb is not None:
            if cleanup and new_tb.start - current_tb.end == period:
                buf = dat[p:p + nbytes]
                bufs.append(from_buffer(buf, current_tb.inputType))
                current_tb.end = new_tb.end

            else:
                if len(bufs) > 1:
                    current_tb.data = np.concatenate(bufs)
                else:
                    current_tb.data = bufs[0]

                current_tb.ndata = len(current_tb.data)
                current_tb = None

        if current_tb is None:
            current_tb = new_tb
            tbl.append(current_tb)
            period = 1 / current_tb.rate
            bufs = [from_buffer(dat[p:p + nbytes], current_tb.inputType)]

        p += nbytes

    if len(bufs) > 1:
        current_tb.data = np.concatenate(bufs)
    else:
        current_tb.data = bufs[0]

    current_tb.ndata = len(current_tb.data)

    return tbl
Пример #45
0
def read_sac_ascii(source, headonly=False):
    """
    Read a SAC ASCII/Alphanumeric file.

    :param source: Full path or File-like object from a SAC ASCII file on disk.
    :type source: str or file
    :param headonly: If headonly is True, return the header arrays not the
        data array.  Note, the entire file is still read in if headonly=True.
    :type headonly: bool

    :return: The float, integer, and string header arrays, and data array,
        in that order. Data array will be None if headonly is True.
    :rtype: :class:`numpy.ndarray`

    """
    # TODO: make headonly=True only read part of the file, not all of it.
    # checks: ASCII-ness, header array length, npts matches data length
    try:
        fh = open(source, 'rb')
        is_file_name = True
    except TypeError:
        fh = source
        is_file_name = False
    except IOError:
        raise SacIOError("No such file: " + source)
    finally:
        contents = fh.read()
        if is_file_name:
            fh.close()

    contents = [_i.rstrip(b"\n\r") for _i in contents.splitlines()]
    if len(contents) < 14 + 8 + 8:
        raise SacIOError("%s is not a valid SAC file:" % fh.name)

    # --------------------------------------------------------------
    # parse the header
    #
    # The sac header has 70 floats, 40 integers, then 192 bytes
    #    in strings. Store them in array (and convert the char to a
    #    list). That's a total of 632 bytes.
    # --------------------------------------------------------------
    # read in the float values
    # TODO: use native '=' dtype byteorder instead of forcing little endian?
    hf = np.array([i.split() for i in contents[:14]],
                  dtype=native_str('<f4')).ravel()
    # read in the int values
    hi = np.array([i.split() for i in contents[14: 14 + 8]],
                  dtype=native_str('<i4')).ravel()
    # reading in the string part is a bit more complicated
    # because every string field has to be 8 characters long
    # apart from the second field which is 16 characters long
    # resulting in a total length of 192 characters
    hs, = init_header_arrays(arrays=('str',))
    for i, j in enumerate(range(0, 24, 3)):
        line = contents[14 + 8 + i]
        hs[j:j + 3] = from_buffer(line[:24], dtype=native_str('|S8'))
    # --------------------------------------------------------------
    # read in the seismogram points
    # --------------------------------------------------------------
    if headonly:
        data = None
    else:
        data = np.array([i.split() for i in contents[30:]],
                        dtype=native_str('<f4')).ravel()

        npts = hi[HD.INTHDRS.index('npts')]
        if len(data) != npts:
            raise SacIOError("Cannot read all data points")

    return hf, hi, hs, data
Пример #46
0
def read_sac_ascii(source, headonly=False):
    """
    Read a SAC ASCII/Alphanumeric file.

    :param source: Full path or File-like object from a SAC ASCII file on disk.
    :type source: str or file
    :param headonly: If headonly is True, return the header arrays not the
        data array.  Note, the entire file is still read in if headonly=True.
    :type headonly: bool

    :return: The float, integer, and string header arrays, and data array,
        in that order. Data array will be None if headonly is True.
    :rtype: :class:`numpy.ndarray`

    """
    # TODO: make headonly=True only read part of the file, not all of it.
    # checks: ASCII-ness, header array length, npts matches data length
    try:
        fh = open(source, 'rb')
        is_file_name = True
    except TypeError:
        fh = source
        is_file_name = False
    except IOError:
        raise SacIOError("No such file: " + source)
    finally:
        contents = fh.read()
        if is_file_name:
            fh.close()

    contents = [_i.rstrip(b"\n\r") for _i in contents.splitlines()]
    if len(contents) < 14 + 8 + 8:
        raise SacIOError("%s is not a valid SAC file:" % fh.name)

    # --------------------------------------------------------------
    # parse the header
    #
    # The sac header has 70 floats, 40 integers, then 192 bytes
    #    in strings. Store them in array (and convert the char to a
    #    list). That's a total of 632 bytes.
    # --------------------------------------------------------------
    # read in the float values
    # TODO: use native '=' dtype byteorder instead of forcing little endian?
    hf = np.array([i.split() for i in contents[:14]], dtype='<f4').ravel()
    # read in the int values
    hi = np.array([i.split() for i in contents[14:14 + 8]],
                  dtype='<i4').ravel()
    # reading in the string part is a bit more complicated
    # because every string field has to be 8 characters long
    # apart from the second field which is 16 characters long
    # resulting in a total length of 192 characters
    hs, = init_header_arrays(arrays=('str', ))
    for i, j in enumerate(range(0, 24, 3)):
        line = contents[14 + 8 + i]
        hs[j:j + 3] = from_buffer(line[:24], dtype='|S8')
    # --------------------------------------------------------------
    # read in the seismogram points
    # --------------------------------------------------------------
    if headonly:
        data = None
    else:
        data = np.array([i.split() for i in contents[30:]],
                        dtype='<f4').ravel()

        npts = hi[HD.INTHDRS.index('npts')]
        if len(data) != npts:
            raise SacIOError("Cannot read all data points")

    return hf, hi, hs, data
Пример #47
0
def read_sac(source, headonly=False, byteorder=None, checksize=False):
    """
    Read a SAC binary file.

    :param source: Full path string for File-like object from a SAC binary file
        on disk.  If it is an open File object, open 'rb'.
    :type source: str or file
    :param headonly: If headonly is True, only read the header arrays not the
        data array.
    :type headonly: bool
    :param byteorder: If omitted or None, automatic byte-order checking is
        done, starting with native order. If byteorder is specified and
        incorrect, a SacIOError is raised.
    :type byteorder: str {'little', 'big'}, optional
    :param checksize: If True, check that the theoretical file size from the
        header matches the size on disk.
    :type checksize: bool

    :return: The float, integer, and string header arrays, and data array,
        in that order. Data array will be None if headonly is True.
    :rtype: tuple of :class:`numpy.ndarray`

    :raises: :class:`ValueError` if unrecognized byte order.  :class:`IOError`
        if file not found, incorrect specified byteorder, theoretical file size
        doesn't match header, or header arrays are incorrect length.

    """
    # TODO: rewrite using "with" statement instead of open/close management.
    # check byte order, header array length, file size, npts == data length
    try:
        f = open(source, 'rb')
        is_file_name = True
    except TypeError:
        # source is already a file-like object
        f = source
        is_file_name = False

    is_byteorder_specified = byteorder is not None
    if not is_byteorder_specified:
        byteorder = sys.byteorder

    if byteorder == 'little':
        endian_str = '<'
    elif byteorder == 'big':
        endian_str = '>'
    else:
        raise ValueError("Unrecognized byteorder. Use {'little', 'big'}")

    # --------------------------------------------------------------
    # READ HEADER
    # The sac header has 70 floats, 40 integers, then 192 bytes
    #    in strings. Store them in array (and convert the char to a
    #    list). That's a total of 632 bytes.
    # --------------------------------------------------------------
    hf = from_buffer(f.read(4 * 70), dtype=native_str(endian_str + 'f4'))
    hi = from_buffer(f.read(4 * 40), dtype=native_str(endian_str + 'i4'))
    hs = from_buffer(f.read(24 * 8), dtype=native_str('|S8'))

    if not is_valid_byteorder(hi):
        if is_byteorder_specified:
            if is_file_name:
                f.close()
            # specified but not valid. you dun messed up.
            raise SacIOError("Incorrect byteorder {}".format(byteorder))
        else:
            # not valid, but not specified.
            # swap the dtype interpretation (dtype.byteorder), but keep the
            # bytes, so the arrays in memory reflect the bytes on disk
            hf = hf.newbyteorder('S')
            hi = hi.newbyteorder('S')

    # we now have correct headers, let's use their correct byte order.
    endian_str = hi.dtype.byteorder

    # check header lengths
    if len(hf) != 70 or len(hi) != 40 or len(hs) != 24:
        hf = hi = hs = None
        if is_file_name:
            f.close()
        raise SacIOError("Cannot read all header values")

    npts = hi[HD.INTHDRS.index('npts')]

    # check file size
    if checksize:
        cur_pos = f.tell()
        f.seek(0, os.SEEK_END)
        length = f.tell()
        f.seek(cur_pos, os.SEEK_SET)
        th_length = (632 + 4 * int(npts))
        if length != th_length:
            if is_file_name:
                f.close()
            msg = "Actual and theoretical file size are inconsistent.\n" \
                  "Actual/Theoretical: {}/{}\n" \
                  "Check that headers are consistent with time series."
            raise SacIOError(msg.format(length, th_length))

    # --------------------------------------------------------------
    # READ DATA
    # --------------------------------------------------------------
    if headonly:
        data = None
    else:
        data = from_buffer(f.read(int(npts) * 4),
                           dtype=native_str(endian_str + 'f4'))

        if len(data) != npts:
            if is_file_name:
                f.close()
            raise SacIOError("Cannot read all data points")

    if is_file_name:
        f.close()

    return hf, hi, hs, data
Пример #48
0
def _read_mseed(mseed_object, starttime=None, endtime=None, headonly=False,
                sourcename=None, reclen=None, details=False,
                header_byteorder=None, verbose=None, **kwargs):
    """
    Reads a Mini-SEED file and returns a Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :param mseed_object: Filename or open file like object that contains the
        binary Mini-SEED data. Any object that provides a read() method will be
        considered to be a file like object.
    :type starttime: :class:`~obspy.core.utcdatetime.UTCDateTime`
    :param starttime: Only read data samples after or at the start time.
    :type endtime: :class:`~obspy.core.utcdatetime.UTCDateTime`
    :param endtime: Only read data samples before or at the end time.
    :param headonly: Determines whether or not to unpack the data or just
        read the headers.
    :type sourcename: str
    :param sourcename: Only read data with matching SEED ID (can contain
        wildcards "?" and "*", e.g. "BW.UH2.*" or "*.??Z"). Defaults to
        ``None``.
    :param reclen: If it is None, it will be automatically determined for every
        record. If it is known, just set it to the record length in bytes which
        will increase the reading speed slightly.
    :type details: bool, optional
    :param details: If ``True`` read additional information: timing quality
        and availability of calibration information.
        Note, that the traces are then also split on these additional
        information. Thus the number of traces in a stream will change.
        Details are stored in the mseed stats AttribDict of each trace.
        ``False`` specifies for both cases, that this information is not
        available. ``blkt1001.timing_quality`` specifies the timing quality
        from 0 to 100 [%]. ``calibration_type`` specifies the type of available
        calibration information blockettes:

        - ``1``: Step Calibration (Blockette 300)
        - ``2``: Sine Calibration (Blockette 310)
        - ``3``: Pseudo-random Calibration (Blockette 320)
        - ``4``: Generic Calibration  (Blockette 390)
        - ``-2``: Calibration Abort (Blockette 395)

    :type header_byteorder: int or str, optional
    :param header_byteorder: Must be either ``0`` or ``'<'`` for LSBF or
        little-endian, ``1`` or ``'>'`` for MBF or big-endian. ``'='`` is the
        native byte order. Used to enforce the header byte order. Useful in
        some rare cases where the automatic byte order detection fails.

    .. rubric:: Example

    >>> from obspy import read
    >>> st = read("/path/to/two_channels.mseed")
    >>> print(st)  # doctest: +ELLIPSIS
    2 Trace(s) in Stream:
    BW.UH3..EHE | 2010-06-20T00:00:00.279999Z - ... | 200.0 Hz, 386 samples
    BW.UH3..EHZ | 2010-06-20T00:00:00.279999Z - ... | 200.0 Hz, 386 samples

    >>> from obspy import UTCDateTime
    >>> st = read("/path/to/two_channels.mseed",
    ...           starttime=UTCDateTime("2010-06-20T00:00:01"),
    ...           sourcename="*.?HZ")
    >>> print(st)  # doctest: +ELLIPSIS
    1 Trace(s) in Stream:
    BW.UH3..EHZ | 2010-06-20T00:00:00.999999Z - ... | 200.0 Hz, 242 samples

    Read with ``details=True`` to read more details of the file if present.

    >>> st = read("/path/to/timingquality.mseed", details=True)
    >>> print(st[0].stats.mseed.blkt1001.timing_quality)
    55

    ``False`` means that the necessary information could not be found in the
    file.

    >>> print(st[0].stats.mseed.calibration_type)
    False

    Note that each change in timing quality from record to record may trigger a
    new Trace object to be created so the Stream object may contain many Trace
    objects if ``details=True`` is used.

    >>> print(len(st))
    101
    """
    # Parse the headonly and reclen flags.
    if headonly is True:
        unpack_data = 0
    else:
        unpack_data = 1
    if reclen is None:
        reclen = -1
    elif reclen not in VALID_RECORD_LENGTHS:
        msg = 'Invalid record length. Autodetection will be used.'
        warnings.warn(msg)
        reclen = -1

    # Determine the byte order.
    if header_byteorder == "=":
        header_byteorder = NATIVE_BYTEORDER

    if header_byteorder is None:
        header_byteorder = -1
    elif header_byteorder in [0, "0", "<"]:
        header_byteorder = 0
    elif header_byteorder in [1, "1", ">"]:
        header_byteorder = 1

    # Parse some information about the file.
    if header_byteorder == 0:
        bo = "<"
    elif header_byteorder > 0:
        bo = ">"
    else:
        bo = None

    # Determine total size. Either its a file-like object.
    if hasattr(mseed_object, "tell") and hasattr(mseed_object, "seek"):
        cur_pos = mseed_object.tell()
        mseed_object.seek(0, 2)
        length = mseed_object.tell() - cur_pos
        mseed_object.seek(cur_pos, 0)
    # Or a file name.
    else:
        length = os.path.getsize(mseed_object)

    if length < 128:
        msg = "The smallest possible mini-SEED record is made up of 128 " \
              "bytes. The passed buffer or file contains only %i." % length
        raise ObsPyMSEEDFilesizeTooSmallError(msg)
    elif length > 2 ** 31:
        msg = ("ObsPy can currently not directly read mini-SEED files that "
               "are larger than 2^31 bytes (2048 MiB). To still read it, "
               "please read the file in chunks as documented here: "
               "https://github.com/obspy/obspy/pull/1419"
               "#issuecomment-221582369")
        raise ObsPyMSEEDFilesizeTooLargeError(msg)

    info = util.get_record_information(mseed_object, endian=bo)

    # Map the encoding to a readable string value.
    if "encoding" not in info:
        # Hopefully detected by libmseed.
        info["encoding"] = None
    elif info["encoding"] in ENCODINGS:
        info['encoding'] = ENCODINGS[info['encoding']][0]
    elif info["encoding"] in UNSUPPORTED_ENCODINGS:
        msg = ("Encoding '%s' (%i) is not supported by ObsPy. Please send "
               "the file to the ObsPy developers so that we can add "
               "support for it.") % \
            (UNSUPPORTED_ENCODINGS[info['encoding']], info['encoding'])
        raise ValueError(msg)
    else:
        msg = "Encoding '%i' is not a valid MiniSEED encoding." % \
            info['encoding']
        raise ValueError(msg)

    record_length = info["record_length"]

    # Only keep information relevant for the whole file.
    info = {'filesize': info['filesize']}

    # If it's a file name just read it.
    if isinstance(mseed_object, (str, native_str)):
        # Read to NumPy array which is used as a buffer.
        bfr_np = np.fromfile(mseed_object, dtype=np.int8)
    elif hasattr(mseed_object, 'read'):
        bfr_np = from_buffer(mseed_object.read(), dtype=np.int8)

    # Search for data records and pass only the data part to the underlying C
    # routine.
    offset = 0
    # 0 to 9 are defined in a row in the ASCII charset.
    min_ascii = ord('0')

    # Small function to check whether an array of ASCII values contains only
    # digits.
    def isdigit(x):
        return True if (x - min_ascii).max() <= 9 else False

    while True:
        # This should never happen
        if (isdigit(bfr_np[offset:offset + 6]) is False) or \
                (bfr_np[offset + 6] not in VALID_CONTROL_HEADERS):
            msg = 'Not a valid (Mini-)SEED file'
            raise Exception(msg)
        elif bfr_np[offset + 6] in SEED_CONTROL_HEADERS:
            offset += record_length
            continue
        break
    bfr_np = bfr_np[offset:]
    buflen = len(bfr_np)

    # If no selection is given pass None to the C function.
    if starttime is None and endtime is None and sourcename is None:
        selections = None
    else:
        select_time = SelectTime()
        selections = Selections()
        selections.timewindows.contents = select_time
        if starttime is not None:
            if not isinstance(starttime, UTCDateTime):
                msg = 'starttime needs to be a UTCDateTime object'
                raise ValueError(msg)
            selections.timewindows.contents.starttime = \
                util._convert_datetime_to_mstime(starttime)
        else:
            # HPTERROR results in no starttime.
            selections.timewindows.contents.starttime = HPTERROR
        if endtime is not None:
            if not isinstance(endtime, UTCDateTime):
                msg = 'endtime needs to be a UTCDateTime object'
                raise ValueError(msg)
            selections.timewindows.contents.endtime = \
                util._convert_datetime_to_mstime(endtime)
        else:
            # HPTERROR results in no starttime.
            selections.timewindows.contents.endtime = HPTERROR
        if sourcename is not None:
            if not isinstance(sourcename, (str, native_str)):
                msg = 'sourcename needs to be a string'
                raise ValueError(msg)
            # libmseed uses underscores as separators and allows filtering
            # after the dataquality which is disabled here to not confuse
            # users. (* == all data qualities)
            selections.srcname = (sourcename.replace('.', '_') + '_*').\
                encode('ascii', 'ignore')
        else:
            selections.srcname = b'*'
    all_data = []

    # Use a callback function to allocate the memory and keep track of the
    # data.
    def allocate_data(samplecount, sampletype):
        # Enhanced sanity checking for libmseed 2.10 can result in the
        # sampletype not being set. Just return an empty array in this case.
        if sampletype == b"\x00":
            data = np.empty(0)
        else:
            data = np.empty(samplecount, dtype=DATATYPES[sampletype])
        all_data.append(data)
        return data.ctypes.data
    # XXX: Do this properly!
    # Define Python callback function for use in C function. Return a long so
    # it hopefully works on 32 and 64 bit systems.
    alloc_data = C.CFUNCTYPE(C.c_longlong, C.c_int, C.c_char)(allocate_data)

    try:
        verbose = int(verbose)
    except Exception:
        verbose = 0

    clibmseed.verbose = bool(verbose)
    try:
        lil = clibmseed.readMSEEDBuffer(
            bfr_np, buflen, selections, C.c_int8(unpack_data),
            reclen, C.c_int8(verbose), C.c_int8(details), header_byteorder,
            alloc_data)
    except InternalMSEEDError as e:
        msg = e.args[0]
        if offset and offset in str(e):
            # Append the offset of the full SEED header if necessary. That way
            # the C code does not have to deal with it.
            if offset and "offset" in msg:
                msg = ("%s\nThe file contains a %i byte dataless part at the "
                       "beginning. Make sure to add that to the reported "
                       "offset to get the actual location in the file." % (
                           msg, offset))
                raise InternalMSEEDError(msg)
        else:
            raise
    finally:
        # Make sure to reset the verbosity.
        clibmseed.verbose = True

    del selections

    traces = []
    try:
        current_id = lil.contents
    # Return stream if not traces are found.
    except ValueError:
        clibmseed.lil_free(lil)
        del lil
        return Stream()

    while True:
        # Init header with the essential information.
        header = {'network': current_id.network.strip(),
                  'station': current_id.station.strip(),
                  'location': current_id.location.strip(),
                  'channel': current_id.channel.strip(),
                  'mseed': {'dataquality': current_id.dataquality}}
        # Loop over segments.
        try:
            current_segment = current_id.firstSegment.contents
        except ValueError:
            break
        while True:
            header['sampling_rate'] = current_segment.samprate
            header['starttime'] = \
                util._convert_mstime_to_datetime(current_segment.starttime)
            header['mseed']['number_of_records'] = current_segment.recordcnt
            header['mseed']['encoding'] = \
                ENCODINGS[current_segment.encoding][0]
            header['mseed']['byteorder'] = \
                "<" if current_segment.byteorder == 0 else ">"
            header['mseed']['record_length'] = current_segment.reclen
            if details:
                timing_quality = current_segment.timing_quality
                if timing_quality == 0xFF:  # 0xFF is mask for not known timing
                    timing_quality = False
                header['mseed']['blkt1001'] = {}
                header['mseed']['blkt1001']['timing_quality'] = timing_quality
                header['mseed']['calibration_type'] = \
                    current_segment.calibration_type \
                    if current_segment.calibration_type != -1 else False

            if headonly is False:
                # The data always will be in sequential order.
                data = all_data.pop(0)
                header['npts'] = len(data)
            else:
                data = np.array([])
                header['npts'] = current_segment.samplecnt
            # Make sure to init the number of samples.
            # Py3k: convert to unicode
            header['mseed'] = dict((k, v.decode())
                                   if isinstance(v, bytes) else (k, v)
                                   for k, v in header['mseed'].items())
            header = dict((k, util._decode_header_field(k, v))
                          if isinstance(v, bytes) else (k, v)
                          for k, v in header.items())
            trace = Trace(header=header, data=data)
            # Append global information.
            for key, value in info.items():
                setattr(trace.stats.mseed, key, value)
            traces.append(trace)
            # A Null pointer access results in a ValueError
            try:
                current_segment = current_segment.next.contents
            except ValueError:
                break
        try:
            current_id = current_id.next.contents
        except ValueError:
            break

    clibmseed.lil_free(lil)  # NOQA
    del lil  # NOQA
    return Stream(traces=traces)
Пример #49
0
def _read_seisan(filename, headonly=False, **kwargs):  # @UnusedVariable
    """
    Reads a SEISAN file and returns an ObsPy Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: SEISAN file to be read.
    :rtype: :class:`~obspy.core.stream.Stream`
    :return: A ObsPy Stream object.

    .. rubric:: Example

    >>> from obspy import read
    >>> st = read("/path/to/2001-01-13-1742-24S.KONO__004")
    >>> st  # doctest: +ELLIPSIS
    <obspy.core.stream.Stream object at 0x...>
    >>> print(st)  # doctest: +ELLIPSIS
    4 Trace(s) in Stream:
    .KONO.0.B0Z | 2001-01-13T17:45:01.999000Z - ... | 20.0 Hz, 6000 samples
    .KONO.0.L0Z | 2001-01-13T17:42:24.924000Z - ... | 1.0 Hz, 3542 samples
    .KONO.0.L0N | 2001-01-13T17:42:24.924000Z - ... | 1.0 Hz, 3542 samples
    .KONO.0.L0E | 2001-01-13T17:42:24.924000Z - ... | 1.0 Hz, 3542 samples
    """
    def _readline(fh, length=80):
        data = fh.read(length + 8)
        end = length + 4
        start = 4
        return data[start:end]
    # read data chunk from given file
    fh = open(filename, 'rb')
    data = fh.read(80 * 12)
    # get version info from file
    (byteorder, arch, _version) = _get_version(data)
    # fetch lines
    fh.seek(0)
    # start with event file header
    # line 1
    data = _readline(fh)
    number_of_channels = int(data[30:33])
    # calculate number of lines with channels
    number_of_lines = number_of_channels // 3 + (number_of_channels % 3 and 1)
    if number_of_lines < 10:
        number_of_lines = 10
    # line 2
    data = _readline(fh)
    # line 3
    for _i in range(0, number_of_lines):
        data = _readline(fh)
    # now parse each event file channel header + data
    stream = Stream()
    dlen = arch // 8
    dtype = np.dtype(native_str(byteorder + 'i' + str(dlen)))
    stype = native_str('=i' + str(dlen))
    for _i in range(number_of_channels):
        # get channel header
        temp = _readline(fh, 1040).decode()
        # create Stats
        header = Stats()
        header['network'] = (temp[16] + temp[19]).strip()
        header['station'] = temp[0:5].strip()
        header['location'] = (temp[7] + temp[12]).strip()
        header['channel'] = (temp[5:7] + temp[8]).strip()
        header['sampling_rate'] = float(temp[36:43])
        header['npts'] = int(temp[43:50])
        # create start and end times
        year = int(temp[9:12]) + 1900
        month = int(temp[17:19])
        day = int(temp[20:22])
        hour = int(temp[23:25])
        mins = int(temp[26:28])
        secs = float(temp[29:35])
        header['starttime'] = UTCDateTime(year, month, day, hour, mins) + secs
        if headonly:
            # skip data
            fh.seek(dlen * (header['npts'] + 2), 1)
            stream.append(Trace(header=header))
        else:
            # fetch data
            data = from_buffer(
                fh.read((header['npts'] + 2) * dtype.itemsize),
                dtype=dtype)
            # convert to system byte order
            data = np.require(data, stype)
            nbytes = (data.size - 2) * dtype.itemsize
            if nbytes != data[0] or nbytes != data[-1]:
                msg = "Mismatching byte size %d, %d, %d"
                warnings.warn(msg % (nbytes, data[0], data[-1]))
            stream.append(Trace(data=data[1:-1], header=header))
    fh.close()
    return stream
Пример #50
0
def bcd_hex(_i):
    m = _i.shape[1]
    _bcd = codecs.encode(_i.ravel(), "hex_codec").decode("ASCII").upper()
    return from_buffer(_bcd, dtype="|S%d" % (m * 2))
Пример #51
0
    def parse_next_trace(self):
        """
        Parse the next trace in the trace pointer list and return a Trace
        object.
        """
        trace_descriptor_block = self.file_pointer.read(32)
        # Check if the trace descriptor block id is valid.
        if unpack(self.endian + b'H', trace_descriptor_block[0:2])[0] != \
           0x4422:
            msg = 'Invalid trace descriptor block id.'
            raise SEG2InvalidFileError(msg)
        size_of_this_block, = unpack_from(self.endian + b'H',
                                          trace_descriptor_block, 2)
        number_of_samples_in_data_block, = \
            unpack_from(self.endian + b'L', trace_descriptor_block, 8)
        data_format_code, = unpack_from(b'B', trace_descriptor_block, 12)

        # Parse the data format code.
        if data_format_code == 4:
            dtype = self.endian + b'f4'
            sample_size = 4
        elif data_format_code == 5:
            dtype = self.endian + b'f8'
            sample_size = 8
        elif data_format_code == 1:
            dtype = self.endian + b'i2'
            sample_size = 2
        elif data_format_code == 2:
            dtype = self.endian + b'i4'
            sample_size = 4
        elif data_format_code == 3:
            dtype = self.endian + b'i2'
            sample_size = 2.5
            if number_of_samples_in_data_block % 4 != 0:
                raise SEG2InvalidFileError(
                    'Data format code 3 requires that the number of samples '
                    'is divisible by 4, but sample count is %d' %
                    (number_of_samples_in_data_block, ))
        else:
            msg = 'Unrecognized data format code'
            raise SEG2InvalidFileError(msg)

        # The rest of the trace block is free form.
        header = {}
        header['seg2'] = AttribDict()
        self.parse_free_form(self.file_pointer.read(size_of_this_block - 32),
                             header['seg2'])
        header['delta'] = float(header['seg2']['SAMPLE_INTERVAL'])
        # Set to the file's start time.
        header['starttime'] = deepcopy(self.starttime)
        if 'DELAY' in header['seg2']:
            if float(header['seg2']['DELAY']) != 0:
                msg = "Non-zero value found in Trace's 'DELAY' field. " + \
                      "This is not supported/tested yet and might lead " + \
                      "to a wrong starttime of the Trace. Please contact " + \
                      "the ObsPy developers with a sample file."
                warnings.warn(msg)

        if "DESCALING_FACTOR" in header["seg2"]:
            header['calib'] = float(header['seg2']['DESCALING_FACTOR'])

        # Unpack the data.
        data = from_buffer(self.file_pointer.read(
            int(number_of_samples_in_data_block * sample_size)),
                           dtype=dtype)
        if data_format_code == 3:
            # Convert one's complement to two's complement by adding one to
            # negative numbers.
            one_to_two = (data < 0)
            # The first two bytes (1 word) of every 10 bytes (5 words) contains
            # a 4-bit exponent for each of the 4 remaining 2-byte (int16)
            # samples.
            exponents = data[0::5].view(self.endian + b'u2')
            result = np.empty(number_of_samples_in_data_block, dtype=np.int32)
            # Apply the negative correction, then multiply by correct exponent.
            result[0::4] = ((data[1::5] + one_to_two[1::5]) *
                            2**((exponents & 0x000f) >> 0))
            result[1::4] = ((data[2::5] + one_to_two[2::5]) *
                            2**((exponents & 0x00f0) >> 4))
            result[2::4] = ((data[3::5] + one_to_two[3::5]) *
                            2**((exponents & 0x0f00) >> 8))
            result[3::4] = ((data[4::5] + one_to_two[4::5]) *
                            2**((exponents & 0xf000) >> 12))
            data = result

        # Integrate SEG2 file header into each trace header
        tmp = self.stream.stats.seg2.copy()
        tmp.update(header['seg2'])
        header['seg2'] = tmp
        return Trace(data=data, header=header)
Пример #52
0
def _read_y(filename, headonly=False, **kwargs):  # @UnusedVariable
    """
    Reads a Nanometrics Y file and returns an ObsPy Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: Nanometrics Y file to be read.
    :type headonly: bool, optional
    :param headonly: If set to True, read only the head. This is most useful
        for scanning available data in huge (temporary) data sets.
    :rtype: :class:`~obspy.core.stream.Stream`
    :return: A ObsPy Stream object.

    .. rubric:: Example

    >>> from obspy import read
    >>> st = read("/path/to/YAYT_BHZ_20021223.124800")
    >>> st  # doctest: +ELLIPSIS
    <obspy.core.stream.Stream object at 0x...>
    >>> print(st)  # doctest: +ELLIPSIS
    1 Trace(s) in Stream:
    .AYT..BHZ | 2002-12-23T12:48:00.000100Z - ... | 100.0 Hz, 18000 samples
    """
    # The first tag in a Y-file must be the TAG_Y_FILE (0) tag. This must be
    # followed by the following tags, in any order:
    #   TAG_STATION_INFO (1)
    #   TAG_STATION_LOCATION (2)
    #   TAG_STATION_PARAMETERS (3)
    #   TAG_STATION_DATABASE (4)
    #   TAG_SERIES_INFO (5)
    #   TAG_SERIES_DATABASE (6)
    # The following tag is optional:
    #   TAG_STATION_RESPONSE (26)
    # The last tag in the file must be a TAG_DATA_INT32 (7) tag. This tag must
    # be followed by an array of LONG's. The number of entries in the array
    # must agree with what was described in the TAG_SERIES_INFO data.
    with open(filename, "rb") as fh:
        trace = Trace()
        trace.stats.y = AttribDict()
        count = -1
        while True:
            endian, tag_type, next_tag, _next_same = __parse_tag(fh)
            if tag_type == 1:
                # TAG_STATION_INFO
                # UCHAR Update[8]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # UCHAR Station[5] (BLANKPAD)
                #   Station is the five letter SEED format station
                #   identification.
                # UCHAR Location[2] (BLANKPAD)
                #   Location Location is the two letter SEED format location
                #   identification.
                # UCHAR Channel[3] (BLANKPAD)
                #   Channel Channel is the three letter SEED format channel
                #   identification.
                # UCHAR NetworkID[51] (ASCIIZ)
                #   This is some descriptive text identifying the network.
                # UCHAR SiteName[61] (ASCIIZ)
                #   SiteName is some text identifying the site.
                # UCHAR Comment[31] (ASCIIZ)
                #   Comment is any comment for this station.
                # UCHAR SensorType[51] (ASCIIZ)
                #   SensorType is some text describing the type of sensor used
                #   at the station.
                # UCHAR DataFormat[7] (ASCIIZ)
                #   DataFormat is some text describing the data format recorded
                #   at the station.
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(b"5s2s3s51z61z31z51z7z", data[8:])
                trace.stats.station = parts[0]
                trace.stats.location = parts[1]
                trace.stats.channel = parts[2]
                # extra
                params = AttribDict()
                params.network_id = parts[3]
                params.side_name = parts[4]
                params.comment = parts[5]
                params.sensor_type = parts[6]
                params.data_format = parts[7]
                trace.stats.y.tag_station_info = params
            elif tag_type == 2:
                # TAG_STATION_LOCATION
                # UCHAR Update[8]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # FLOAT Latitude
                #   Latitude in degrees of the location of the station. The
                #   latitude should be between -90 (South) and +90 (North).
                # FLOAT Longitude
                #   Longitude in degrees of the location of the station. The
                #   longitude should be between -180 (West) and +180 (East).
                # FLOAT Elevation
                #   Elevation in meters above sea level of the station.
                # FLOAT Depth
                #   Depth is the depth in meters of the sensor.
                # FLOAT Azimuth
                #   Azimuth of the sensor in degrees clockwise.
                # FLOAT Dip
                #   Dip is the dip of the sensor. 90 degrees is defined as
                #   vertical right way up.
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(endian + b"ffffff", data[8:])
                params = AttribDict()
                params.latitude = parts[0]
                params.longitude = parts[1]
                params.elevation = parts[2]
                params.depth = parts[3]
                params.azimuth = parts[4]
                params.dip = parts[5]
                trace.stats.y.tag_station_location = params
            elif tag_type == 3:
                # TAG_STATION_PARAMETERS
                # UCHAR Update[16]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # REALTIME StartValidTime
                #   Time that the information in these records became valid.
                # REALTIME EndValidTime
                #   Time that the information in these records became invalid.
                # FLOAT Sensitivity
                #   Sensitivity of the sensor in nanometers per bit.
                # FLOAT SensFreq
                #   Frequency at which the sensitivity was measured.
                # FLOAT SampleRate
                #   This is the number of samples per second. This value can be
                #   less than 1.0. (i.e. 0.1)
                # FLOAT MaxClkDrift
                #   Maximum drift rate of the clock in seconds per sample.
                # UCHAR SensUnits[24] (ASCIIZ)
                #   Some text indicating the units in which the sensitivity was
                #   measured.
                # UCHAR CalibUnits[24] (ASCIIZ)
                #   Some text indicating the units in which calibration input
                #   was measured.
                # UCHAR ChanFlags[27] (BLANKPAD)
                #   Text indicating the channel flags according to the SEED
                #   definition.
                # UCHAR UpdateFlag
                #   This flag must be “N” or “U” according to the SEED
                #   definition.
                # UCHAR Filler[4]
                #   Filler Pads out the record to satisfy the alignment
                #   restrictions for reading data on a SPARC processor.
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(endian + b"ddffff24z24z27sc4s", data[16:])
                trace.stats.sampling_rate = parts[4]
                # extra
                params = AttribDict()
                params.start_valid_time = parts[0]
                params.end_valid_time = parts[1]
                params.sensitivity = parts[2]
                params.sens_freq = parts[3]
                params.sample_rate = parts[4]
                params.max_clk_drift = parts[5]
                params.sens_units = parts[6]
                params.calib_units = parts[7]
                params.chan_flags = parts[8]
                params.update_flag = parts[9]
                trace.stats.y.tag_station_parameters = params
            elif tag_type == 4:
                # TAG_STATION_DATABASE
                # UCHAR Update[8]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # REALTIME LoadDate
                #   Date the information was loaded into the database.
                # UCHAR Key[16]
                #   Unique key that identifies this record in the database.
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(endian + b"d16s", data[8:])
                params = AttribDict()
                params.load_date = parts[0]
                params.key = parts[1]
                trace.stats.y.tag_station_database = params
            elif tag_type == 5:
                # TAG_SERIES_INFO
                # UCHAR Update[16]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # REALTIME StartTime
                #   This is start time of the data in this series.
                # REALTIME EndTime
                #   This is end time of the data in this series.
                # ULONG NumSamples
                #   This is the number of samples of data in this series.
                # LONG DCOffset
                #   DCOffset is the DC offset of the data.
                # LONG MaxAmplitude
                #   MaxAmplitude is the maximum amplitude of the data.
                # LONG MinAmplitude
                #   MinAmplitude is the minimum amplitude of the data.
                # UCHAR Format[8] (ASCIIZ)
                #   This is the format of the data. This should always be
                #   “YFILE”.
                # UCHAR FormatVersion[8] (ASCIIZ)
                #   FormatVersion is the version of the format of the data.
                #   This should always be “5.0”
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(endian + b"ddLlll8z8z", data[16:])
                trace.stats.starttime = UTCDateTime(parts[0])
                count = parts[2]
                # extra
                params = AttribDict()
                params.endtime = UTCDateTime(parts[1])
                params.num_samples = parts[2]
                params.dc_offset = parts[3]
                params.max_amplitude = parts[4]
                params.min_amplitude = parts[5]
                params.format = parts[6]
                params.format_version = parts[7]
                trace.stats.y.tag_series_info = params
            elif tag_type == 6:
                # TAG_SERIES_DATABASE
                # UCHAR Update[8]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # REALTIME LoadDate
                #   Date the information was loaded into the database.
                # UCHAR Key[16]
                #   Unique key that identifies this record in the database.
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(endian + b"d16s", data[8:])
                params = AttribDict()
                params.load_date = parts[0]
                params.key = parts[1]
                trace.stats.y.tag_series_database = params
            elif tag_type == 26:
                # TAG_STATION_RESPONSE
                # UCHAR Update[8]
                #   This field is only used internally for administrative
                #   purposes.  It should always be set to zeroes.
                # UCHAR PathName[260]
                #  PathName is the full name of the file which contains the
                #  response information for this station.
                data = fh.read(next_tag)
                parts = _unpack_with_asciiz_and_decode(b"260s", data[8:])
                params = AttribDict()
                params.path_name = parts[0]
                trace.stats.y.tag_station_response = params
            elif tag_type == 7:
                # TAG_DATA_INT32
                trace.data = from_buffer(fh.read(np.dtype(np.int32).itemsize * count), dtype=np.int32)
                # break loop as TAG_DATA_INT32 should be the last tag in file
                break
            else:
                fh.seek(next_tag, 1)
    return Stream([trace])
Пример #53
0
def _read_win(filename, century="20", **kwargs):  # @UnusedVariable
    """
    Reads a WIN file and returns a Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :type filename: str
    :param filename: WIN file to be read.
    :param century: WIN stores year as 2 numbers, need century to
        construct proper datetime.
    :rtype: :class:`~obspy.core.stream.Stream`
    :returns: Stream object containing header and data.
    """
    output = {}
    srates = {}

    # read win file
    with open(filename, "rb") as fpin:
        fpin.seek(0, 2)
        sz = fpin.tell()
        fpin.seek(0)
        leng = 0
        status0 = 0
        start = 0
        while leng < sz:
            pklen = fpin.read(4)
            if len(pklen) < 4:
                break
            leng = 4
            truelen = from_buffer(pklen, native_str('>i'))[0]
            if truelen == 0:
                break
            buff = fpin.read(6)
            leng += 6

            yy = "%s%02x" % (century, ord(buff[0:1]))
            mm = "%x" % ord(buff[1:2])
            dd = "%x" % ord(buff[2:3])
            hh = "%x" % ord(buff[3:4])
            mi = "%x" % ord(buff[4:5])
            sec = "%x" % ord(buff[5:6])

            date = UTCDateTime(int(yy), int(mm), int(dd), int(hh), int(mi),
                               int(sec))
            if start == 0:
                start = date
            if status0 == 0:
                sdata = None
            while leng < truelen:
                buff = fpin.read(4)
                leng += 4
                flag = '%02x' % ord(buff[0:1])
                chanum = '%02x' % ord(buff[1:2])
                chanum = "%02s%02s" % (flag, chanum)
                datawide = int('%x' % (ord(buff[2:3]) >> 4))
                srate = ord(buff[3:4])
                xlen = (srate - 1) * datawide
                if datawide == 0:
                    xlen = srate // 2
                    datawide = 0.5

                idata00 = fpin.read(4)
                leng += 4
                idata22 = from_buffer(idata00, native_str('>i'))[0]

                if chanum in output:
                    output[chanum].append(idata22)
                else:
                    output[chanum] = [idata22, ]
                    srates[chanum] = srate
                sdata = fpin.read(xlen)
                leng += xlen

                if len(sdata) < xlen:
                    fpin.seek(-(xlen - len(sdata)), 1)
                    sdata += fpin.read(xlen - len(sdata))
                    msg = "This shouldn't happen, it's weird..."
                    warnings.warn(msg)

                if datawide == 0.5:
                    for i in range(xlen):
                        idata2 = output[chanum][-1] + \
                            from_buffer(sdata[i:i + 1], np.int8)[0] >> 4
                        output[chanum].append(idata2)
                        idata2 = idata2 +\
                            (from_buffer(sdata[i:i + 1],
                                         np.int8)[0] << 4) >> 4
                        output[chanum].append(idata2)
                elif datawide == 1:
                    for i in range((xlen // datawide)):
                        idata2 = output[chanum][-1] +\
                            from_buffer(sdata[i:i + 1], np.int8)[0]
                        output[chanum].append(idata2)
                elif datawide == 2:
                    for i in range((xlen // datawide)):
                        idata2 = output[chanum][-1] +\
                            from_buffer(sdata[2 * i:2 * (i + 1)],
                                        native_str('>h'))[0]
                        output[chanum].append(idata2)
                elif datawide == 3:
                    for i in range((xlen // datawide)):
                        idata2 = output[chanum][-1] +\
                            from_buffer(sdata[3 * i:3 * (i + 1)] + b' ',
                                        native_str('>i'))[0] >> 8
                        output[chanum].append(idata2)
                elif datawide == 4:
                    for i in range((xlen // datawide)):
                        idata2 = output[chanum][-1] +\
                            from_buffer(sdata[4 * i:4 * (i + 1)],
                                        native_str('>i'))[0]
                        output[chanum].append(idata2)
                else:
                    msg = "DATAWIDE is %s " % datawide + \
                          "but only values of 0.5, 1, 2, 3 or 4 are supported."
                    raise NotImplementedError(msg)

    traces = []
    for i in output.keys():
        t = Trace(data=np.array(output[i]))
        t.stats.channel = str(i)
        t.stats.sampling_rate = float(srates[i])
        t.stats.starttime = start
        traces.append(t)
    return Stream(traces=traces)