Exemplo n.º 1
0
def run(args):
    priors = [[1.0, 1.0, 1.0]]
    if args.priors is not None:
        priors = common_lib.read_matrix_ascii(args.priors)
        if len(priors) != 0 and len(priors[0]) != 3:
            raise RuntimeError("Invalid dimension for priors {0}"
                               "".format(priors))

    priors_sum = sum(priors[0])
    sil_prior = old_div(priors[0][0], priors_sum)
    speech_prior = old_div(priors[0][1], priors_sum)
    garbage_prior = old_div(priors[0][2], priors_sum)

    transform_mat = [[
        old_div(args.sil_scale, sil_prior),
        old_div(args.speech_in_sil_weight, speech_prior),
        old_div(args.garbage_in_sil_weight, garbage_prior)
    ],
                     [
                         old_div(args.sil_in_speech_weight, sil_prior),
                         old_div(1.0, speech_prior),
                         old_div(args.garbage_in_speech_weight, garbage_prior)
                     ]]

    common_lib.write_matrix_ascii(sys.stdout, transform_mat)
Exemplo n.º 2
0
def run(args):
    priors = [[1.0, 1.0, 1.0]]
    if args.priors is not None:
        priors = common_lib.read_matrix_ascii(args.priors)
        if len(priors) != 0 and len(priors[0]) != 3:
            raise RuntimeError("Invalid dimension for priors {0}"
                               "".format(priors))

    priors_sum = sum(priors[0])
    sil_prior = priors[0][0] / priors_sum
    speech_prior = priors[0][1] / priors_sum
    garbage_prior = priors[0][2] / priors_sum

    transform_mat = [
        [
            args.sil_scale / sil_prior,
            args.speech_in_sil_weight / speech_prior,
            args.garbage_in_sil_weight / garbage_prior,
        ],
        [
            args.sil_in_speech_weight / sil_prior,
            1.0 / speech_prior,
            args.garbage_in_speech_weight / garbage_prior,
        ],
    ]

    common_lib.write_matrix_ascii(sys.stdout, transform_mat)
Exemplo n.º 3
0
def run(args):
    # Load priors.
    # - priors[0]  --  prior probability of non-speech
    # - priors[1]  --  prior probability of speech
    # - priors[2]  --  prior probability of garbage; ignored
    priors = common_lib.read_matrix_ascii(args.priors)
    if len(priors) != 0 and len(priors[0]) != 3:
        raise RuntimeError(f'Invalid dimension for priors {priors}')
    priors = np.squeeze(np.array(priors, dtype=np.float64))

    # Create matrix that converts posteriors to likelihoods by dividing by
    # normalized priors.
    pmass = priors[0] + priors[1]  # Total mass devoted to speech/non-speech.
    priors /= pmass
    transform_mat = np.diag(1 / priors)
    transform_mat[2, 2] = 0.0  # Ignore garbage entirely
    transform_mat[1, 1] *= args.speech_likelihood_weight
    common_lib.write_matrix_ascii(sys.stdout, transform_mat)
Exemplo n.º 4
0
def run(args):
    priors = [[1.0, 1.0, 1.0]]
    if args.priors is not None:
        priors = common_lib.read_matrix_ascii(args.priors)
        if len(priors) != 0 and len(priors[0]) != 3:
            raise RuntimeError("Invalid dimension for priors {0}"
                               "".format(priors))

    priors_sum = sum(priors[0])
    sil_prior = priors[0][0] / priors_sum
    speech_prior = priors[0][1] / priors_sum
    garbage_prior = priors[0][2] / priors_sum

    transform_mat = [[args.sil_scale / sil_prior,
                      args.speech_in_sil_weight / speech_prior,
                      args.garbage_in_sil_weight / garbage_prior],
                     [args.sil_in_speech_weight / sil_prior,
                      1.0 / speech_prior,
                      args.garbage_in_speech_weight / garbage_prior]]

    common_lib.write_matrix_ascii(sys.stdout, transform_mat)
def run(args):
    reco2utt = {}
    with common_lib.smart_open(args.reco2utt) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 2:
                raise ValueError("Could not parse line {0}".format(line))
            reco2utt[parts[0]] = parts[1:]

    reco2num_frames = {}
    with common_lib.smart_open(args.reco2num_frames) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 2:
                raise ValueError("Could not parse line {0}".format(line))
            if parts[0] not in reco2utt:
                continue
            reco2num_frames[parts[0]] = int(parts[1])

    segments = {}
    with common_lib.smart_open(args.segments) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) not in [4, 5]:
                raise ValueError("Could not parse line {0}".format(line))
            utt = parts[0]
            reco = parts[1]
            if reco not in reco2utt:
                continue
            start_time = float(parts[2])
            end_time = float(parts[3])
            segments[utt] = [reco, start_time, end_time]

    num_utt_err = 0
    num_utt = 0
    num_reco = 0

    if args.default_targets is not None:
        default_targets = np.matrix(
            common_lib.read_matrix_ascii(args.default_targets))
    else:
        default_targets = np.matrix([[1, 0, 0]])
    assert (np.shape(default_targets)[0] == 1
            and np.shape(default_targets)[1] == 3)

    with common_lib.smart_open(args.out_targets_ark, 'w') as f:
        for reco, utts in reco2utt.iteritems():
            reco_mat = np.repeat(default_targets,
                                 reco2num_frames[reco],
                                 axis=0)
            utts.sort(key=lambda x: segments[x][1])  # sort on start time
            for i, utt in enumerate(utts):
                if utt not in segments:
                    num_utt_err += 1
                    continue
                segment = segments[utt]

                start_frame = int(segment[1] / args.frame_shift)
                end_frame = int(segment[2] / args.frame_shift)
                num_frames = end_frame - start_frame

                if end_frame > reco2num_frames[reco]:
                    end_frame = reco2num_frames[reco]
                    num_frames = end_frame - start_frame

                reco_mat[start_frame:end_frame] = np.zeros([num_frames, 3])
                num_utt += 1

            if reco_mat.shape[0] > 0:
                common_lib.write_matrix_ascii(f, reco_mat.tolist(), key=reco)
                num_reco += 1

    logger.info("Got default out-of-segment targets for {num_reco} recordings "
                "containing {num_utt} in-segment regions; "
                "failed to account {num_utt_err} utterances"
                "".format(num_reco=num_reco,
                          num_utt=num_utt,
                          num_utt_err=num_utt_err))

    if num_utt == 0 or num_utt_err > num_utt // 2 or num_reco == 0:
        raise RuntimeError
Exemplo n.º 6
0
def run(args):
    reco2utt = read_reco2utt_file(args.reco2utt)
    reco2num_frames = read_reco2num_frames_file(args.reco2num_frames)
    segments = read_segments_file(args.segments, reco2utt)
    targets = read_targets_scp(args.targets_scp, segments)

    if args.default_targets is not None:
        # Read the vector of default targets for out-of-segment regions
        default_targets = np.matrix(
            common_lib.read_matrix_ascii(args.default_targets))
    else:
        default_targets = np.zeros([1, 3])
    assert np.shape(default_targets)[0] == 1 and np.shape(
        default_targets)[1] == 3

    num_utt_err = 0
    num_utt = 0
    num_reco = 0

    with common_lib.smart_open(args.out_targets_ark, "w") as fh:
        for reco, utts in reco2utt.items():
            # Read a recording and the list of its utterances from the
            # reco2utt dictionary
            reco_mat = np.repeat(default_targets,
                                 reco2num_frames[reco],
                                 axis=0)
            utts.sort(key=lambda x: segments[x][1])  # sort on start time

            end_frame_accounted = 0

            for i, utt in enumerate(utts):
                if utt not in segments or utt not in targets:
                    num_utt_err += 1
                    continue
                segment = segments[utt]

                # Read the targets corresponding to the segments
                cmd = "copy-feats --binary=false {mat_fn} -" "".format(
                    mat_fn=targets[utt])
                p = subprocess.Popen(cmd,
                                     shell=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)

                try:
                    mat = np.matrix(common_lib.read_matrix_ascii(p.stdout),
                                    dtype="float32")
                except Exception:
                    logger.error("Command '{cmd}' failed".format(cmd=cmd))
                    raise
                finally:
                    [stdout, stderr] = p.communicate()
                    if p.returncode is not None and p.returncode != 0:
                        raise RuntimeError(
                            'Command "{cmd}" failed with status {status}; '
                            "stderr = {stderr}".format(cmd=cmd,
                                                       status=-p.returncode,
                                                       stderr=stderr))

                start_frame = int(segment[1] / args.frame_shift + 0.5)
                end_frame = int(segment[2] / args.frame_shift + 0.5)
                num_frames = end_frame - start_frame

                if num_frames <= 0:
                    raise ValueError("Invalid line in segments file {0}"
                                     "".format(segment))

                if abs(mat.shape[0] - num_frames) > args.length_tolerance:
                    logger.warning("For utterance {utt}, mismatch in segment "
                                   "length and targets matrix size; "
                                   "{s_len} vs {t_len}".format(
                                       utt=utt,
                                       s_len=num_frames,
                                       t_len=mat.shape[0]))
                    num_utt_err += 1
                    continue

                # Fix end_frame and num_frames if the segment goes beyond
                # the length of the recording.
                if end_frame > reco2num_frames[reco]:
                    end_frame = reco2num_frames[reco]
                    num_frames = end_frame - start_frame

                # Fix "num_frames" and "end_frame" if "num_frames" is lower
                # than the size of the targets matrix "mat"
                num_frames = min(num_frames, mat.shape[0])
                end_frame = start_frame + num_frames

                if num_frames <= 0:
                    logger.warning("For utterance {utt}, start-frame {start} "
                                   "is outside the recording"
                                   "".format(utt=utt, start=start_frame))
                    num_utt_err += 1
                    continue

                if end_frame < end_frame_accounted:
                    logger.warning("For utterance {utt}, end-frame {end} "
                                   "is before the end of a previous segment. "
                                   "i.e. this segment is completely within "
                                   "another segment. Ignoring this segment."
                                   "".format(utt=utt, end=end_frame))
                    num_utt_err += 1
                    continue

                if start_frame < end_frame_accounted:
                    # Segment overlaps with a previous utterance
                    # Combine targets using a weighted interpolation using a
                    # triangular window with a weight of 1 at the start/end of
                    # overlap and 0 at the end/start of the segment
                    for n in range(0, end_frame_accounted - start_frame):
                        w = float(n) / float(end_frame_accounted - start_frame)
                        reco_mat[n + start_frame, :] = (
                            reco_mat[n + start_frame, :] * (1.0 - w) +
                            mat[n, :] * w)

                    if end_frame > end_frame_accounted:
                        reco_mat[end_frame_accounted:end_frame, :] = mat[(
                            end_frame_accounted -
                            start_frame):(end_frame - start_frame), :, ]
                else:
                    # No overlap with the previous utterances.
                    # So just add it to the output.
                    reco_mat[start_frame:end_frame, :] = mat[0:num_frames, :]
                logger.debug(
                    "reco_mat shape = %s, mat shape = %s, "
                    "start_frame = %d, end_frame = %d",
                    reco_mat.shape,
                    mat.shape,
                    start_frame,
                    end_frame,
                )

                end_frame_accounted = end_frame
                num_utt += 1

            if reco_mat.shape[0] > 0:
                common_lib.write_matrix_ascii(fh, reco_mat, key=reco)
                num_reco += 1

    logger.info("Merged {num_utt} segment targets from {num_reco} recordings; "
                "failed with {num_utt_err} utterances"
                "".format(num_utt=num_utt,
                          num_reco=num_reco,
                          num_utt_err=num_utt_err))

    if num_utt == 0 or num_utt_err > num_utt // 2 or num_reco == 0:
        raise RuntimeError
def run(args):
    reco2utt = read_reco2utt_file(args.reco2utt)
    reco2num_frames = read_reco2num_frames_file(args.reco2num_frames)
    segments = read_segments_file(args.segments, reco2utt)
    targets = read_targets_scp(args.targets_scp, segments)

    if args.default_targets is not None:
        # Read the vector of default targets for out-of-segment regions
        default_targets = np.matrix(
            common_lib.read_matrix_ascii(args.default_targets))
    else:
        default_targets = np.zeros([1, 3])
    assert (np.shape(default_targets)[0] == 1
            and np.shape(default_targets)[1] == 3)

    num_utt_err = 0
    num_utt = 0
    num_reco = 0

    with common_lib.smart_open(args.out_targets_ark, 'w') as fh:
        for reco, utts in reco2utt.iteritems():
            # Read a recording and the list of its utterances from the
            # reco2utt dictionary
            reco_mat = np.repeat(default_targets, reco2num_frames[reco],
                                 axis=0)
            utts.sort(key=lambda x: segments[x][1])   # sort on start time

            for i, utt in enumerate(utts):
                if utt not in segments or utt not in targets:
                    num_utt_err += 1
                    continue
                segment = segments[utt]

                # Read the targets corresponding to the segments
                cmd = ("copy-feats --binary=false {mat_fn} -"
                       "".format(mat_fn=targets[utt]))
                p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)

                try:
                    mat = np.matrix(common_lib.read_matrix_ascii(p.stdout),
                                    dtype='float32')
                except Exception:
                    logger.error("Command '{cmd}' failed".format(cmd=cmd))
                    raise
                finally:
                    [stdout, stderr] = p.communicate()
                    if p.returncode is not None and p.returncode != 0:
                        raise RuntimeError(
                            'Command "{cmd}" failed with status {status}; '
                            'stderr = {stderr}'.format(cmd=cmd, status=-p.returncode,
                                                       stderr=stderr))

                start_frame = int(segment[1] / args.frame_shift + 0.5)
                end_frame = int(segment[2] / args.frame_shift + 0.5)
                num_frames = end_frame - start_frame

                if num_frames <= 0:
                    raise ValueError("Invalid line in segments file {0}"
                                     "".format(segment))

                if abs(mat.shape[0] - num_frames) > args.length_tolerance:
                    logger.warning("For utterance {utt}, mismatch in segment "
                                   "length and targets matrix size; "
                                   "{s_len} vs {t_len}".format(
                                       utt=utt, s_len=num_frames,
                                       t_len=mat.shape[0]))
                    num_utt_err += 1
                    continue

                if end_frame > reco2num_frames[reco]:
                    end_frame = reco2num_frames[reco]
                    num_frames = end_frame - start_frame

                if num_frames < 0:
                    logger.warning("For utterance {utt}, start-frame {start} "
                                   "is outside the recording"
                                   "".format(utt=utt, start=start_frame))
                    num_utt_err += 1
                    continue

                prev_utt_end_frame = (
                    int(segments[utts[i-1]][2] / args.frame_shift + 0.5)
                    if i > 0 else 0)
                if start_frame < prev_utt_end_frame:
                    # Segment overlaps with the previous utterance
                    # Combine targets using a weighted interpolation using a
                    # triangular window with a weight of 1 at the start/end of
                    # overlap and 0 at the end/start of the segment
                    for n in range(0, prev_utt_end_frame - start_frame):
                        w = float(n) / float(prev_utt_end_frame - start_frame)
                        reco_mat[n + start_frame, :] = (
                            reco_mat[n + start_frame, :] * (1.0 - w)
                            + mat[n, :] * w)

                    num_frames = min(num_frames, mat.shape[0])
                    end_frame = start_frame + num_frames
                    reco_mat[prev_utt_end_frame:end_frame, :] = (
                        mat[(prev_utt_end_frame-start_frame):
                            (end_frame-start_frame), :])
                else:
                    # No overlap with the previous utterances.
                    # So just add it to the output.
                    num_frames = min(num_frames, mat.shape[0])
                    reco_mat[start_frame:(start_frame + num_frames), :] = (
                        mat[0:num_frames, :])
                logger.debug("reco_mat shape = %s, mat shape = %s, "
                             "start_frame = %d, end_frame = %d", reco_mat.shape,
                             mat.shape, start_frame, end_frame)
                num_utt += 1

            if reco_mat.shape[0] > 0:
                common_lib.write_matrix_ascii(fh, reco_mat,
                                              key=reco)
                num_reco += 1

    logger.info("Merged {num_utt} segment targets from {num_reco} recordings; "
                "failed with {num_utt_err} utterances"
                "".format(num_utt=num_utt, num_reco=num_reco,
                          num_utt_err=num_utt_err))

    if num_utt == 0 or num_utt_err > num_utt / 2 or num_reco == 0:
        raise RuntimeError
def run(args):
    reco2utt = {}
    with common_lib.smart_open(args.reco2utt) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 2:
                raise ValueError("Could not parse line {0}".format(line))
            reco2utt[parts[0]] = parts[1:]

    reco2num_frames = {}
    with common_lib.smart_open(args.reco2num_frames) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 2:
                raise ValueError("Could not parse line {0}".format(line))
            if parts[0] not in reco2utt:
                continue
            reco2num_frames[parts[0]] = int(parts[1])

    segments = {}
    with common_lib.smart_open(args.segments) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) not in [4, 5]:
                raise ValueError("Could not parse line {0}".format(line))
            utt = parts[0]
            reco = parts[1]
            if reco not in reco2utt:
                continue
            start_time = float(parts[2])
            end_time = float(parts[3])
            segments[utt] = [reco, start_time, end_time]

    num_utt_err = 0
    num_utt = 0
    num_reco = 0

    if args.default_targets is not None:
        default_targets = np.matrix(common_lib.read_matrix_ascii(args.default_targets))
    else:
        default_targets = np.matrix([[1, 0, 0]])
    assert (np.shape(default_targets)[0] == 1
            and np.shape(default_targets)[1] == 3)

    with common_lib.smart_open(args.out_targets_ark, 'w') as f:
        for reco, utts in reco2utt.iteritems():
            reco_mat = np.repeat(default_targets, reco2num_frames[reco],
                                 axis=0)
            utts.sort(key=lambda x: segments[x][1])   # sort on start time
            for i, utt in enumerate(utts):
                if utt not in segments:
                    num_utt_err += 1
                    continue
                segment = segments[utt]

                start_frame = int(segment[1] / args.frame_shift)
                end_frame = int(segment[2] / args.frame_shift)
                num_frames = end_frame - start_frame

                if end_frame > reco2num_frames[reco]:
                    end_frame = reco2num_frames[reco]
                    num_frames = end_frame - start_frame

                reco_mat[start_frame:end_frame] = np.zeros([num_frames, 3])
                num_utt += 1

            if reco_mat.shape[0] > 0:
                common_lib.write_matrix_ascii(f, reco_mat.tolist(),
                                              key=reco)
                num_reco += 1

    logger.info("Got default out-of-segment targets for {num_reco} recordings "
                "containing {num_utt} in-segment regions; "
                "failed to account {num_utt_err} utterances"
                "".format(num_reco=num_reco, num_utt=num_utt,
                          num_utt_err=num_utt_err))

    if num_utt == 0 or num_utt_err > num_utt / 2 or num_reco == 0:
        raise RuntimeError