Exemplo n.º 1
0
    def process(self, event):
        if not isinstance(event, dict):
            raise ValueError("Expected event of type dict, got one of type %s" % (type(event),))

        # We want the bindings in the event to override those in the static
        # dict, so we make of copy of the static dict and update the copy with
        # the event
        bindings = dict(self._static_variable_dict)
        bindings.update(event)
        
        command_args = self._make_command_sequence(bindings)
        
        # This will throw with a useful error message if the command can't be
        # run or if it runs and fails.
        if self.notify_stream:
            note = '[' + timestamp.get_timestamp() + '] Starting: ' + ' '.join(command_args) + '\n'
            self.notify_stream.write(note)
        (self._last_stdout_output,
         self._last_stderr_output,
         self._last_command_line) = process.subprocess(command_args)

        # If we got here, the subprocess finished successfully.
        if self.notify_stream:
            note = '[' + timestamp.get_timestamp() + '] Finished: ' + ' '.join(command_args) + '\n'
            self.notify_stream.write(note)
        self.send(event)
Exemplo n.º 2
0
    def process(self, event):
        if not isinstance(event, dict):
            raise ValueError(
                "Expected event of type dict, got one of type %s" %
                (type(event), ))

        # We want the bindings in the event to override those in the static
        # dict, so we make of copy of the static dict and update the copy with
        # the event
        bindings = dict(self._static_variable_dict)
        bindings.update(event)

        command_args = self._make_command_sequence(bindings)

        # This will throw with a useful error message if the command can't be
        # run or if it runs and fails.
        if self.notify_stream:
            note = '[' + timestamp.get_timestamp() + '] Starting: ' + ' '.join(
                command_args) + '\n'
            self.notify_stream.write(note)
        (self._last_stdout_output, self._last_stderr_output,
         self._last_command_line) = process.subprocess(command_args)

        # If we got here, the subprocess finished successfully.
        if self.notify_stream:
            note = '[' + timestamp.get_timestamp() + '] Finished: ' + ' '.join(
                command_args) + '\n'
            self.notify_stream.write(note)
        self.send(event)
Exemplo n.º 3
0
def _get_sphere_fd(fileno, file_num_bytes, filename, wave_format,
                   filename_abs):
    """
    Low-level reading of NIST Sphere audio data.
    
    >>> module_dir, module_name = os.path.split(__file__)
    >>> zero_sph = os.path.join(module_dir, 'zero.sph')
    >>> shorten_sph = os.path.join(module_dir, 'shorten.sph')

    >>> with open(zero_sph, 'rb') as audio_file:
    ...   file_info, audio_info, wave = _get_sphere_fd(audio_file.fileno(), os.path.getsize(zero_sph), zero_sph, 'int16', os.path.abspath(zero_sph))

    >>> for key in sorted(file_info): print "%-24s  %r" % (key, file_info[key])
    file_item_bytes           2
    file_item_coding          'int16'
    file_sndfile_extension    'sph'
    file_sndfile_format       ''
    file_sndfile_type         'pcm SPH (NIST Sphere)'

    >>> for key in sorted(audio_info): print "%-24s  %r" % (key, audio_info[key])
    audio_num_channels        2
    audio_num_samples         128064
    audio_sample_rate         44100

    >>> print str(wave)
    [[  0  -1  -3 ..., 203 211 199]
     [  0   0  -3 ..., 225 225 221]]


    >>> with open(shorten_sph, 'rb') as audio_file:
    ...   file_info2, audio_info2, wave2 = _get_sphere_fd(audio_file.fileno(), os.path.getsize(shorten_sph), shorten_sph, 'int16', os.path.abspath(shorten_sph))

    >>> for key in sorted(file_info2): print "%-24s  %r" % (key, file_info2[key])
    file_item_bytes           2
    file_item_coding          'int16'
    file_sndfile_extension    'sph'
    file_sndfile_format       ''
    file_sndfile_type         'pcm,embedded-shorten-v2.00 SPH (NIST Sphere)'

    >>> for key in sorted(audio_info2): print "%-24s  %r" % (key, audio_info2[key])
    audio_num_channels        1
    audio_num_samples         37120
    audio_sample_rate         20000

    >>> print str(wave2)
    [[-1  1  1 ..., -4 -8 -5]]
    """

    assert wave_format is None or wave_format in wave_c_formats

    # As of 2009-04-20 see the following for NIST's underspecified
    # format description:
    #   http://ftp.cwi.nl/audio/NIST-SPHERE
    #   http://www.ldc.upenn.edu/Catalog/docs/LDC93S5/WAV_SPEC.TXT

    nist_1a = 'NIST_1A'
    header1 = read_fd_strict(fileno, 128, filename_abs)
    if not header1.startswith(nist_1a):
        raise AudioTypeError(
            "did not find %r in header of purported NIST Sphere file %r" %
            (nist_1a, filename_abs))
    nist, header_size, rest = header1.split(None, 2)
    assert nist == nist_1a

    header_size = int(header_size)
    rest += read_fd_strict(fileno, header_size - len(header1), filename_abs)

    # For now, we require the following fields:
    #   sample_count -i 128064
    #   sample_n_bytes -i 2
    #   channel_count -i 2
    #   sample_byte_format -s2 01
    #   sample_rate -i 44100
    #   sample_coding -s3 pcm
    info = builtin.attrdict()
    for line in cStringIO.StringIO(rest):
        parts = line.split()
        if not parts or parts[0][0] == ';': continue
        if parts[0] == 'end_head': break
        if len(parts) < 3:
            raise AudioTypeError(
                "expected at least three white-space-separated fields in NIST header line %r in file %r"
                % (line.strip(), filename_abs))
        field_name, field_type, field_value = line.split(None, 3)
        #print field_name, field_type, field_value
        if field_type in ('-i', '-r'):
            field_value, _, _ = field_value.partition(';')
            info[field_name] = (int
                                if field_type == '-i' else float)(field_value)
        elif field_type.startswith('-s'):
            # here we do a stricter interpretation of the spec
            prefix_len = len(field_name + ' ' + field_type + ' ')
            str_len = int(field_type[2:])
            info[field_name] = line[prefix_len:prefix_len + str_len]
        else:
            raise (AudioTypeError,
                   "unhandled field_type %r for field_name %r" %
                   (field_type, field_name))

    missing = set(
        ('sample_count', 'sample_n_bytes', 'channel_count',
         'sample_byte_format', 'sample_rate', 'sample_coding')) - set(info)
    if missing:
        raise AudioTypeError(
            "missing required header fields (%s) in NIST Sphere file %r" %
            (', '.join(sorted(missing)), filename_abs))

    # this is a blunt check against bogus data
    if info.sample_n_bytes > 2:
        raise AudioTypeError(
            "unhandled sample_n_bytes, %d, in NIST Sphere file %r" %
            (info.sample_n_bytes, filename_abs))

    audio_info = builtin.attrdict((('audio_num_channels', info.channel_count),
                                   ('audio_num_samples', info.sample_count),
                                   ('audio_sample_rate', info.sample_rate)))
    check_positive_ints(audio_info, filename_abs)
    file_info = builtin.attrdict((
        ('file_item_bytes', info.sample_n_bytes),
        ('file_item_coding', ('int' + str(8 * info.sample_n_bytes))
         if info.sample_coding.lower().startswith('pcm') else
         info.sample_coding[:4].lower()),
        ('file_sndfile_extension', 'sph'),
        ('file_sndfile_format', ''),
        ('file_sndfile_type', info.sample_coding + ' SPH (NIST Sphere)'),
    ))
    check_positive_ints(file_info, filename_abs)

    if wave_format is not None:
        args = 'sph2pipe', '-p', '-f', 'raw', filename_abs
        stdout, stderr, cmd = process.subprocess(args)
        if stderr:
            raise onyx.ExternalError(
                "unexpected stderr from command '%s': '%s'" %
                (cmd, stderr.strip()))
        num_audio_bytes = audio_info.audio_num_channels * audio_info.audio_num_samples * numpy.int16(
        ).itemsize
        if len(stdout) != num_audio_bytes:
            raise onyx.DataFormatError(
                "expected %d bytes of audio data, got %d" %
                (num_audio_bytes, len(stdout)))
        wave = numpy.fromstring(stdout, dtype=numpy.int16)
        assert wave.shape == (audio_info.audio_num_channels *
                              audio_info.audio_num_samples, )
        # reshape it, etc; then construct a new ndarray
        wave = wave.reshape((-1, audio_info.audio_num_channels)).transpose()
        wave = numpy.array(wave, dtype=wave_numpy_formats[wave_format])
        # do scaling the same way libsndfile does
        if wave_format == 'int32':
            wave <<= 16
        elif wave_format in ('float32', 'float64'):
            wave *= (1 / (1 << 15))
        assert wave.shape == (audio_info.audio_num_channels,
                              audio_info.audio_num_samples)
    else:
        wave = None

    return file_info, audio_info, wave
Exemplo n.º 4
0
def _get_sphere_fd(fileno, file_num_bytes, filename, wave_format, filename_abs):
    """
    Low-level reading of NIST Sphere audio data.
    
    >>> module_dir, module_name = os.path.split(__file__)
    >>> zero_sph = os.path.join(module_dir, 'zero.sph')
    >>> shorten_sph = os.path.join(module_dir, 'shorten.sph')

    >>> with open(zero_sph, 'rb') as audio_file:
    ...   file_info, audio_info, wave = _get_sphere_fd(audio_file.fileno(), os.path.getsize(zero_sph), zero_sph, 'int16', os.path.abspath(zero_sph))

    >>> for key in sorted(file_info): print "%-24s  %r" % (key, file_info[key])
    file_item_bytes           2
    file_item_coding          'int16'
    file_sndfile_extension    'sph'
    file_sndfile_format       ''
    file_sndfile_type         'pcm SPH (NIST Sphere)'

    >>> for key in sorted(audio_info): print "%-24s  %r" % (key, audio_info[key])
    audio_num_channels        2
    audio_num_samples         128064
    audio_sample_rate         44100

    >>> print str(wave)
    [[  0  -1  -3 ..., 203 211 199]
     [  0   0  -3 ..., 225 225 221]]


    >>> with open(shorten_sph, 'rb') as audio_file:
    ...   file_info2, audio_info2, wave2 = _get_sphere_fd(audio_file.fileno(), os.path.getsize(shorten_sph), shorten_sph, 'int16', os.path.abspath(shorten_sph))

    >>> for key in sorted(file_info2): print "%-24s  %r" % (key, file_info2[key])
    file_item_bytes           2
    file_item_coding          'int16'
    file_sndfile_extension    'sph'
    file_sndfile_format       ''
    file_sndfile_type         'pcm,embedded-shorten-v2.00 SPH (NIST Sphere)'

    >>> for key in sorted(audio_info2): print "%-24s  %r" % (key, audio_info2[key])
    audio_num_channels        1
    audio_num_samples         37120
    audio_sample_rate         20000

    >>> print str(wave2)
    [[-1  1  1 ..., -4 -8 -5]]
    """

    assert wave_format is None or wave_format in wave_c_formats

    # As of 2009-04-20 see the following for NIST's underspecified
    # format description:
    #   http://ftp.cwi.nl/audio/NIST-SPHERE
    #   http://www.ldc.upenn.edu/Catalog/docs/LDC93S5/WAV_SPEC.TXT

    nist_1a = 'NIST_1A'
    header1 = read_fd_strict(fileno, 128, filename_abs)
    if not header1.startswith(nist_1a):
        raise AudioTypeError("did not find %r in header of purported NIST Sphere file %r" % (nist_1a, filename_abs))
    nist, header_size, rest = header1.split(None, 2)
    assert nist == nist_1a

    header_size = int(header_size)
    rest += read_fd_strict(fileno, header_size - len(header1), filename_abs)

    # For now, we require the following fields:
    #   sample_count -i 128064
    #   sample_n_bytes -i 2
    #   channel_count -i 2
    #   sample_byte_format -s2 01
    #   sample_rate -i 44100
    #   sample_coding -s3 pcm
    info = builtin.attrdict()
    for line in cStringIO.StringIO(rest):
        parts = line.split()
        if not parts or parts[0][0] == ';': continue
        if parts[0] == 'end_head': break
        if len(parts) < 3:
            raise AudioTypeError("expected at least three white-space-separated fields in NIST header line %r in file %r" % (line.strip(), filename_abs))
        field_name, field_type, field_value = line.split(None, 3)
        #print field_name, field_type, field_value
        if field_type in ('-i', '-r'):
            field_value, _, _ = field_value.partition(';')
            info[field_name] = (int if field_type == '-i' else float)(field_value)
        elif field_type.startswith('-s'):
            # here we do a stricter interpretation of the spec
            prefix_len = len(field_name + ' ' + field_type + ' ')
            str_len = int(field_type[2:])
            info[field_name] = line[prefix_len:prefix_len+str_len]
        else:
            raise (AudioTypeError, "unhandled field_type %r for field_name %r" % (field_type, field_name))

    missing = set(('sample_count', 'sample_n_bytes', 'channel_count', 'sample_byte_format', 'sample_rate', 'sample_coding')) - set(info)
    if missing:
        raise AudioTypeError("missing required header fields (%s) in NIST Sphere file %r" % (', '.join(sorted(missing)), filename_abs))

    # this is a blunt check against bogus data
    if info.sample_n_bytes > 2:
        raise AudioTypeError("unhandled sample_n_bytes, %d, in NIST Sphere file %r" % (info.sample_n_bytes, filename_abs))

    audio_info = builtin.attrdict((('audio_num_channels', info.channel_count),
                                   ('audio_num_samples', info.sample_count),
                                   ('audio_sample_rate', info.sample_rate)))
    check_positive_ints(audio_info, filename_abs)
    file_info = builtin.attrdict((('file_item_bytes', info.sample_n_bytes),
                                  ('file_item_coding', ('int' + str(8*info.sample_n_bytes)) if info.sample_coding.lower().startswith('pcm') else info.sample_coding[:4].lower()),
                                  ('file_sndfile_extension', 'sph'),
                                  ('file_sndfile_format', ''),
                                  ('file_sndfile_type', info.sample_coding + ' SPH (NIST Sphere)'),
                                  ))
    check_positive_ints(file_info, filename_abs)

    if wave_format is not None:
        args = 'sph2pipe', '-p', '-f', 'raw', filename_abs
        stdout, stderr, cmd = process.subprocess(args)
        if stderr:
            raise onyx.ExternalError("unexpected stderr from command '%s': '%s'" % (cmd, stderr.strip()))
        num_audio_bytes = audio_info.audio_num_channels * audio_info.audio_num_samples * numpy.int16().itemsize
        if len(stdout) != num_audio_bytes:
            raise onyx.DataFormatError("expected %d bytes of audio data, got %d" % (num_audio_bytes, len(stdout)))             
        wave = numpy.fromstring(stdout, dtype=numpy.int16)
        assert wave.shape == (audio_info.audio_num_channels * audio_info.audio_num_samples,)
        # reshape it, etc; then construct a new ndarray
        wave = wave.reshape((-1, audio_info.audio_num_channels)).transpose()
        wave = numpy.array(wave, dtype=wave_numpy_formats[wave_format])
        # do scaling the same way libsndfile does
        if wave_format == 'int32':
            wave <<= 16
        elif wave_format in ('float32', 'float64'):
            wave *= (1 / (1 << 15))
        assert wave.shape == (audio_info.audio_num_channels, audio_info.audio_num_samples)
    else:
        wave = None

    return file_info, audio_info, wave