示例#1
0
def process_in_each_thread(line, name, apply_cmvn):
    if name != 'test':
        utt_id, inputs_path, labels_path = line.strip().split()
    else:
        utt_id, inputs_path = line.strip().split()
    tfrecords_name = os.path.join(FLAGS.output_dir, name,
                                  utt_id + ".tfrecords")
    with tf.python_io.TFRecordWriter(tfrecords_name) as writer:
        tf.logging.info(
            "Writing utterance %s to %s" % (utt_id, tfrecords_name))
        inputs = read_binary_file(inputs_path, FLAGS.input_dim).astype(np.float64)
        if name != 'test':
            labels = read_binary_file(labels_path, FLAGS.output_dim).astype(np.float64)
        else:
            labels = None
        if apply_cmvn:
            if name != 'test':
                spk_id = utt_id[:3]
                cmvn = np.load(os.path.join(FLAGS.output_dir, "{}_cmvn.npz".format(spk_id)))
            else:
                cmvn = np.load(os.path.join(FLAGS.output_dir, "train_cmvn.npz"))
            inputs = (inputs - cmvn["mean_inputs"]) / cmvn["stddev_inputs"]
            if labels is not None:
                labels = (labels - cmvn["mean_labels"]) / cmvn["stddev_labels"]
        ex = make_sequence_example(inputs, labels)
        writer.write(ex.SerializeToString())
示例#2
0
def get_train_data(sequence_length=100):
    """ Prepare the input/output data for the Neural Network """

    network_input = list()
    network_output = list()
    notes = read_binary_file(str(data_dir / "notes.pkl"))

    # get all pitch names
    pitch_names = sorted(set(item for item in notes))
    # Embedding #TODO use keras Embedding layer instead
    note_to_int = read_binary_file(metadata_dir / "note_to_int.pkl")
    vocab_size = len(set(note_to_int))

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)
    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    network_input = network_input / float(vocab_size)
    network_output = np_utils.to_categorical(network_output)

    with open(metadata_dir / "sequence_in.pkl", "wb") as f:
        pickle.dump(network_input, f)
    with open(metadata_dir / "sequence_out.pkl", "wb") as f:
        pickle.dump(network_output, f)
    return network_input, network_output, vocab_size
示例#3
0
def prepare_validation_data(files, sequence_length):
    """have to re-write"""
    notes = list()
    sequence_in = list()
    sequence_out = list()
    note_to_int = read_binary_file(metadata_dir / "note_to_int.pkl")

    for file_name in files:
        data = read_binary_file(file_name)
        notes.extend(data)

    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in.append(notes[i:i + sequence_length])
        sequence_out.append(notes[i + sequence_length])
    for i in range(0, len(sequence_in)):
        for j in range(0, len(sequence_in[i])):
            sequence_in[i][j] = note_to_int[sequence_in[i][j]]
    for i in range(0, len(sequence_out)):
        sequence_out[i] = note_to_int[sequence_out[i]]

    sequence_in = np.reshape(sequence_in,
                             (len(sequence_in), len(sequence_in[0]), 1))
    sequence_in = sequence_in / float(len(note_to_int))
    sequence_out = np_utils.to_categorical(sequence_out,
                                           num_classes=len(note_to_int))
    return sequence_in, sequence_out
示例#4
0
def calculate_cmvn(file_list, spk_id):
    inputs_frame_count, labels_frame_count = 0, 0
    for line in file_list:
        utt_id, inputs_path, labels_path = line.strip().split()
        tf.logging.info("Reading utterance %s" % utt_id)
        inputs = read_binary_file(inputs_path, FLAGS.input_dim)
        labels = read_binary_file(labels_path, FLAGS.output_dim)
        if inputs_frame_count == 0:    # create numpy array for accumulating
            ex_inputs = np.sum(inputs, axis=0)
            ex2_inputs = np.sum(inputs**2, axis=0)
            ex_labels = np.sum(labels, axis=0)
            ex2_labels = np.sum(labels**2, axis=0)
        else:
            ex_inputs += np.sum(inputs, axis=0)
            ex2_inputs += np.sum(inputs**2, axis=0)
            ex_labels += np.sum(labels, axis=0)
            ex2_labels += np.sum(labels**2, axis=0)
        inputs_frame_count += len(inputs)
        labels_frame_count += len(labels)

    mean_inputs = ex_inputs / inputs_frame_count
    stddev_inputs = np.sqrt(ex2_inputs / inputs_frame_count - mean_inputs**2)
    stddev_inputs[stddev_inputs < 1e-20] = 1e-20

    mean_labels = ex_labels / labels_frame_count
    stddev_labels = np.sqrt(ex2_labels / labels_frame_count - mean_labels**2)
    stddev_labels[stddev_labels < 1e-20] = 1e-20

    cmvn_name = os.path.join(FLAGS.output_dir, spk_id + "_cmvn.npz")
    np.savez(cmvn_name,
             mean_inputs=mean_inputs,
             stddev_inputs=stddev_inputs,
             mean_labels=mean_labels,
             stddev_labels=stddev_labels)
    tf.logging.info("Wrote to %s" % cmvn_name)
示例#5
0
    def generate_sloth_input(self):

        img_data = utils.read_binary_file(self.image_filepath)
        tw_data  = utils.read_binary_file(self.tweets_filepath)
        
        img_tw_bytes = bytearray(img_data + tw_data)

        ret_val = utils.hash_sha512(img_tw_bytes)

        return ret_val
示例#6
0
    def generate_sloth_input(self):

        img_data = utils.read_binary_file(self.image_filepath)
        tw_data = utils.read_binary_file(self.tweets_filepath)

        img_tw_bytes = bytearray(img_data + tw_data)

        ret_val = utils.hash_sha512(img_tw_bytes)

        return ret_val
示例#7
0
def read_data(args, raw):
    label_dir = os.path.join(cur_file, raw, 'prepared_label')
    cmp_dir = os.path.join(cur_file, raw, 'prepared_cmp')

    if os.path.exists(label_dir) and os.path.exists(cmp_dir):
        logger.info('Raw data has been prepared.')
        return

    if not os.path.exists(label_dir):
        os.mkdir(label_dir)
    if not os.path.exists(cmp_dir):
        os.mkdir(cmp_dir)

    label_files = os.listdir(args.label_dir)
    cmp_files = os.listdir(args.cmp_dir)

    # Do frame alignment
    for line in label_files:
        filename, _ = os.path.splitext(line.strip())
        logger.info('processing ' + filename)
        sys.stdout.flush()

        label_mat = np.loadtxt(os.path.join(args.label_dir, filename + '.lab'))
        if args.model_type == 'acoustic':
            cmp_mat = read_binary_file(os.path.join(args.cmp_dir,
                                                    filename + ".cmp"),
                                       dimension=hparams['target_channels'],
                                       dtype=np.float64)
        elif args.model_type == 'acoustic_mgc':
            cmp_mat = read_binary_file(
                os.path.join(args.cmp_dir, filename + ".cmp"),
                dimension=hparams['mgc_target_channels'],
                dtype=np.float32)

        if label_mat.shape[0] <= cmp_mat.shape[0]:
            cmp_mat = cmp_mat[:label_mat.shape[0], :]
        else:
            frame_diff = label_mat.shape[0] - cmp_mat.shape[0]
            rep = np.repeat(cmp_mat[-1:, :], frame_diff, axis=0)
            cmp_mat = np.concatenate([cmp_mat, rep], axis=0)

        write_binary_file(label_mat, os.path.join(label_dir,
                                                  filename + '.lab'))
        if args.model_type == 'acoustic':
            write_binary_file(cmp_mat,
                              os.path.join(cmp_dir, filename + '.cmp'),
                              dtype=np.float64)
        elif args.model_type == 'acoustic_mgc':
            write_binary_file(cmp_mat,
                              os.path.join(cmp_dir, filename + '.cmp'),
                              dtype=np.float32)
示例#8
0
def scale_pdf(filedata):
    """
    Scales the given PDF filedata down and returns the compressed PDF data.
    """
    # If there are more than 50 pages, it's going to take forever to scale -
    # don't even bother trying.
    pagecount = utils.pdf_count_pages(filedata)
    if pagecount > 50:
        al.error("Abandon PDF scaling - has > 50 pages (%s found)" % pagecount,
                 "media.scale_pdf")
        return filedata
    inputfile = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
    outputfile = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
    inputfile.write(filedata)
    inputfile.flush()
    inputfile.close()
    outputfile.close()
    # If something went wrong during the scaling, use the original data
    if not scale_pdf_file(inputfile.name, outputfile.name):
        return filedata
    compressed = utils.read_binary_file(outputfile.name)
    os.unlink(inputfile.name)
    os.unlink(outputfile.name)
    # If something has gone wrong and the scaled one has no size, return the original
    if len(compressed) == 0:
        return filedata
    # If the original is smaller than the scaled one, return the original
    if len(compressed) > len(filedata):
        return filedata
    return compressed
示例#9
0
def scale_all_animal_images(dbo):
    """
    Goes through all animal images in the database and scales
    them to the current incoming media scaling factor.
    """
    mp = dbo.query("SELECT ID, MediaName FROM media WHERE MediaMimeType = 'image/jpeg' AND LinkTypeID = 0")
    for i, m in enumerate(mp):
        filepath = dbo.query_string("SELECT Path FROM dbfs WHERE Name = ?", [m.MEDIANAME])
        name = str(m.MEDIANAME)
        inputfile = tempfile.NamedTemporaryFile(suffix=".jpg", delete=False)
        outputfile = tempfile.NamedTemporaryFile(suffix=".jpg", delete=False)
        odata = dbfs.get_string(dbo, name)
        inputfile.write(odata)
        inputfile.flush()
        inputfile.close()
        outputfile.close()
        al.debug("scaling %s (%d of %d)" % (name, i, len(mp)), "media.scale_all_animal_images", dbo)
        try:
            scale_image_file(inputfile.name, outputfile.name, configuration.incoming_media_scaling(dbo))
        except Exception as err:
            al.error("failed scaling image, doing nothing: %s" % err, "media.scale_all_animal_images", dbo)
            continue
        data = utils.read_binary_file(outputfile.name)
        os.unlink(inputfile.name)
        os.unlink(outputfile.name)
        # Update the image file data
        dbfs.put_string(dbo, name, filepath, data)
        dbo.update("media", m.ID, { "MediaSize": len(data) })
    al.debug("scaled %d images" % len(mp), "media.scale_all_animal_images", dbo)
示例#10
0
def check_saved_embedding_variables(args,
                                    embedding_variable_name,
                                    use_hashtable=True,
                                    gpu_num=None,
                                    atol=1e-4,
                                    rtol=1e-4):
    filepath = r"./embedding_variables"

    sok_keys_filename = os.path.join(filepath,
                                     embedding_variable_name + r"_keys.file")
    element_type = "long long"
    if hasattr(args, "key_dtype"):
        element_type = "long long" if args.key_dtype == "int64" else "unsigned int"
    sok_keys = utils.read_binary_file(sok_keys_filename,
                                      element_type=element_type)
    sok_values_filename = os.path.join(
        filepath, embedding_variable_name + r"_values.file")
    sok_values = utils.read_binary_file(sok_values_filename,
                                        element_type="float")

    sorted_sok_keys, sorted_sok_values = utils.sort_embedding_variables_by_key(
        sok_keys,
        sok_values,
        embedding_vec_size=args.embedding_vec_size,
        use_hashtable=use_hashtable,
        gpu_num=gpu_num)

    tf_values_filename = os.path.join(filepath, r"tf_variable.file")
    tf_values = utils.restore_from_file(tf_values_filename)
    valid_tf_values = utils.get_valid_tf_values(sorted_sok_keys, tf_values[0])

    import numpy as np
    atol, rtol = atol, rtol
    sorted_sok_values = np.reshape(sorted_sok_values,
                                   newshape=(sorted_sok_keys.size,
                                             args.embedding_vec_size))
    allclose = np.allclose(sorted_sok_values,
                           valid_tf_values,
                           atol=atol,
                           rtol=rtol)
    if not allclose:
        raise ValueError(
            f"The Variable from SOK: \n{sorted_sok_values}, \nis not near to that from TF: \n{valid_tf_values}"
            f" \n at atol: {atol}, rtol: {rtol}")
    print(
        "[INFO]: the saved parameters are consistent between sparse operation kit and TensorFlow"
    )
示例#11
0
def check_saved_embedding_variables(args,
                                    embedding_variable_names,
                                    use_hashtable=True,
                                    gpu_num=None,
                                    atol=1e-4,
                                    rtol=1e-4):
    filepath = r"./embedding_variables"
    for i, embedding_variable_name in enumerate(embedding_variable_names):
        sok_keys_filename = os.path.join(
            filepath, embedding_variable_name + r"_keys.file")
        element_type = "long long"
        if hasattr(args, "key_dtype"):
            element_type = "long long" if args.key_dtype == "int64" else "unsigned int"
        sok_keys = utils.read_binary_file(sok_keys_filename,
                                          element_type=element_type)
        sok_values_filename = os.path.join(
            filepath, embedding_variable_name + r"_values.file")
        sok_values = utils.read_binary_file(sok_values_filename,
                                            element_type="float")

        sorted_sok_keys, sorted_sok_values = utils.sort_embedding_variables_by_key(
            sok_keys,
            sok_values,
            embedding_vec_size=args.embedding_vec_size[i],
            use_hashtable=use_hashtable,
            gpu_num=gpu_num)

        tf_values_filename = os.path.join(filepath,
                                          r"tf_variable_" + str(i) + r".file")
        tf_values = utils.restore_from_file(tf_values_filename)
        valid_tf_values = utils.get_valid_tf_values(sorted_sok_keys,
                                                    tf_values[0])

        vec_size = args.embedding_vec_size[i]
        newshape = tuple([sorted_sok_keys.size, vec_size])
        sorted_sok_values = np.reshape(sorted_sok_values, newshape=newshape)
        allclose = np.allclose(sorted_sok_values,
                               valid_tf_values,
                               atol=atol,
                               rtol=rtol)
        if not allclose:
            raise ValueError(
                f"\n{sorted_sok_values} \nis not near to \n{valid_tf_values} "
                f"\nat rotl={rtol}, atol={atol}")
    print(
        "[INFO]: the saved parameters are consistent between sparse operation kit and TensorFlow"
    )
示例#12
0
def put_file(dbo, name, path, filepath):
    """
    Reads the the file from filepath and stores it with name/path
    """
    check_create_path(dbo, path)
    s = utils.read_binary_file(filepath)
    dbfsid = dbo.insert("dbfs", {"Name": name, "Path": path})
    o = DBFSStorage(dbo)
    o.put(dbfsid, name, s)
    return dbfsid
示例#13
0
def prepare_normalizer(list_paths, dim):
    dataset = []
    for file_path in list_paths:
        try:
            data, _ = read_binary_file(file_path, dim)
            dataset.append(data)
        except FileNotFoundError:
            print(FileNotFoundError)
    dataset = np.concatenate(dataset)
    scaler = StandardScaler().fit(dataset)
    del dataset
    return scaler
示例#14
0
def calculate_cmvn(name):
    """Calculate mean and var."""
    tf.logging.info("Calculating mean and var of %s" % name)
    config_filename = open(os.path.join(FLAGS.config_dir, name + '.lst'))

    inputs_frame_count, labels_frame_count = 0, 0
    for line in config_filename:
        utt_id, inputs_path, labels_path = line.strip().split()
        tf.logging.info("Reading utterance %s" % utt_id)
        inputs = read_binary_file(inputs_path, FLAGS.input_dim)
        labels = read_binary_file(labels_path, FLAGS.output_dim)
        if inputs_frame_count == 0:  # create numpy array for accumulating
            ex_inputs = np.sum(inputs, axis=0)
            ex2_inputs = np.sum(inputs**2, axis=0)
            ex_labels = np.sum(labels, axis=0)
            ex2_labels = np.sum(labels**2, axis=0)
        else:
            ex_inputs += np.sum(inputs, axis=0)
            ex2_inputs += np.sum(inputs**2, axis=0)
            ex_labels += np.sum(labels, axis=0)
            ex2_labels += np.sum(labels**2, axis=0)
        inputs_frame_count += len(inputs)
        labels_frame_count += len(labels)

    mean_inputs = ex_inputs / inputs_frame_count
    stddev_inputs = np.sqrt(ex2_inputs / inputs_frame_count - mean_inputs**2)
    stddev_inputs[stddev_inputs < 1e-20] = 1e-20

    mean_labels = ex_labels / labels_frame_count
    stddev_labels = np.sqrt(ex2_labels / labels_frame_count - mean_labels**2)
    stddev_labels[stddev_labels < 1e-20] = 1e-20

    cmvn_name = os.path.join(FLAGS.output_dir, name + "_cmvn.npz")
    np.savez(cmvn_name,
             mean_inputs=mean_inputs,
             stddev_inputs=stddev_inputs,
             mean_labels=mean_labels,
             stddev_labels=stddev_labels)
    config_filename.close()
    tf.logging.info("Wrote to %s" % cmvn_name)
示例#15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--cmp_dir', default='')
    parser.add_argument('--output',
                        default='./splited_cmp/',
                        type=str,
                        help='path to output cmp')
    parser.add_argument('--model_type', default='')

    args = parser.parse_args()
    logging.basicConfig(
        format='%(asctime)s %(filename)s %(levelname)s %(message)s',
        datefmt='%a, %d %b %Y %H:%M:%S',
        level=logging.DEBUG,
        stream=sys.stdout)
    if not os.path.exists(args.output):
        os.mkdir(args.output)

    cmp_file = os.listdir(args.cmp_dir)
    if args.model_type == 'acoustic':
        for cmp_filename in cmp_file:
            cmp = read_binary_file(os.path.join(args.cmp_dir, cmp_filename),
                                   dimension=hparams['target_channels'],
                                   dtype=np.float64)
            sp = np.zeros(cmp.shape)
            sp[:, :hparams['spec_units']] = cmp[:, :hparams['spec_units']]
            sp[:, -hparams['energy_units']] = cmp[:, -hparams['energy_units']]
            lf0 = cmp[:, hparams['spec_units']:hparams['spec_units'] +
                      hparams['lf0_units']]
            uv = cmp[:, hparams['spec_units'] +
                     hparams['lf0_units']:hparams['spec_units'] +
                     hparams['lf0_units'] + hparams['uv_units']]
            cap = cmp[:, hparams['spec_units'] + hparams['lf0_units'] +
                      hparams['uv_units']:hparams['cap_units'] +
                      hparams['spec_units'] + hparams['lf0_units'] +
                      hparams['uv_units']]
            lf0[uv == 0] = 0
            write_binary_file(sp,
                              os.path.join(
                                  args.output,
                                  os.path.splitext(cmp_filename)[0] + '.sp'),
                              dtype=np.float64)
            write_binary_file(sp,
                              os.path.join(
                                  args.output,
                                  os.path.splitext(cmp_filename)[0] + '.lf0'),
                              dtype=np.float64)
            write_binary_file(sp,
                              os.path.join(
                                  args.output,
                                  os.path.splitext(cmp_filename)[0] + '.ap'),
                              dtype=np.float64)
示例#16
0
def test(netG, opt):
    assert opt.netG != ''
    test_dir = opt.testdata_dir
    for f in os.listdir(test_dir):
        fname, ext = os.path.splitext(f)
        if ext == '.cmp':
            print(fname)
            cmp_file = os.path.join(test_dir, f)
            ac_data = read_binary_file(cmp_file, dim=47)
            ac_data = torch.FloatTensor(ac_data)
            noise = torch.FloatTensor(ac_data.size(0), nz)
            if opt.cuda:
                ac_data, noise = ac_data.cuda(), noise.cuda()
            ac_data = Variable(ac_data)
            noise = Variable(noise)
            noise.data.normal_(0, 1)
            generated_pulses = netG(noise, ac_data)
            generated_pulses = generated_pulses.data.cpu().numpy()
            generated_pulses = generated_pulses.reshape(ac_data.size(0), -1)
            out_file = os.path.join(test_dir, fname + '.pls')
            with open(out_file, 'wb') as fid:
                generated_pulses.tofile(fid)
示例#17
0
 def read_binary_file(self, file_subpath):
     return read_binary_file(self.to_full_path(file_subpath))
示例#18
0
def get_image_file_data(dbo, mode, iid, seq=0, justdate=False):
    """
    Gets an image
    mode: animal | media | animalthumb | person | personthumb | dbfs
    iid: The id of the animal for animal/thumb mode or the media record
        or a template path for dbfs mode
    seq: If the mode is animal or person, returns image X for that person/animal
         The first image is always the preferred photo and seq is 1-based.
    if justdate is True, returns the last modified date
    if justdate is False, returns a tuple containing the last modified date and image data
    """
    def nopic():
        NOPIC_DATE = datetime.datetime(2011, 1, 1)
        if justdate: return NOPIC_DATE
        return (NOPIC_DATE, "NOPIC")

    def thumb_nopic():
        NOPIC_DATE = datetime.datetime(2011, 1, 1)
        if justdate: return NOPIC_DATE
        return (NOPIC_DATE, "NOPIC")

    def mrec(mm):
        if len(mm) == 0: return nopic()
        if justdate: return mm[0].DATE
        return (mm[0].DATE, dbfs.get_string(dbo, mm[0].MEDIANAME))

    def thumb_mrec(mm):
        if len(mm) == 0: return thumb_nopic()
        if justdate: return mm[0].DATE
        return (mm[0].DATE,
                scale_thumbnail(dbfs.get_string(dbo, mm[0].MEDIANAME)))

    if mode == "animal":
        if seq == 0:
            return mrec(get_web_preferred(dbo, ANIMAL, int(iid)))
        else:
            return mrec(get_media_by_seq(dbo, ANIMAL, int(iid), seq))

    elif mode == "person":
        if seq == 0:
            return mrec(get_web_preferred(dbo, PERSON, int(iid)))
        else:
            return mrec(get_media_by_seq(dbo, PERSON, int(iid), seq))

    elif mode == "animalthumb":
        return thumb_mrec(get_web_preferred(dbo, ANIMAL, int(iid)))

    elif mode == "personthumb":
        return thumb_mrec(get_web_preferred(dbo, PERSON, int(iid)))

    elif mode == "media":
        return mrec(get_media_by_id(dbo, int(iid)))

    elif mode == "dbfs":
        if justdate:
            return dbo.now()
        else:
            if str(iid).startswith("/"):
                # Complete path was given
                return (dbo.now(), dbfs.get_string_filepath(dbo, str(iid)))
            else:
                # Only name was given
                return (dbo.now(), dbfs.get_string(dbo, str(iid)))

    elif mode == "nopic":
        if dbfs.file_exists(dbo, "nopic.jpg"):
            return (dbo.now(),
                    dbfs.get_string_filepath(dbo, "/reports/nopic.jpg"))
        else:
            return (dbo.now(),
                    utils.read_binary_file(dbo.installpath +
                                           "media/reports/nopic.jpg"))

    else:
        return nopic()
示例#19
0
def decode(args, model, device):
    model.eval()
    data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            'data_' + args.name)
    config_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                              'config_' + args.name)
    data_list = open(os.path.join(config_dir, 'test.lst'), 'r').readlines()
    cmvn = np.load(os.path.join(data_dir, "train_cmvn.npz"))
    if not os.path.exists(args.output):
        os.mkdir(args.output)

    if args.model_type == 'acoustic':
        for input_name in data_list:
            input_name = input_name.split(' ')[0] + '.lab'
            logging.info(f'decode {input_name} ...')
            input = read_binary_file(os.path.join(
                os.path.join(data_dir, 'test', 'label'), input_name),
                                     dimension=hparams['in_channels'])
            input = torch.from_numpy(input).to(device)
            input = input.unsqueeze(0)
            output, uv_output = model(input)
            output = output.squeeze()
            uv_output = F.softmax(uv_output, dim=-1)[:, :, 0]
            uv_output = uv_output.squeeze()
            uv = torch.ones(uv_output.shape).to(device)
            uv[uv_output > 0.5] = 0.0
            uv = uv.unsqueeze(-1)
            output = torch.cat((uv, output), -1)
            output = output.cpu().squeeze().detach().numpy()
            uv = uv.cpu().squeeze().detach().numpy()
            output = output * cmvn['stddev_labels'] + cmvn["mean_labels"]

            cap = output[:, 1:hparams['cap_units']]
            sp = np.concatenate(
                (output[:, hparams['cap_units'] + hparams['energy_units'] +
                        1:hparams['cap_units'] + hparams['energy_units'] +
                        hparams['spec_units'] + 1],
                 output[:, hparams['cap_units'] + 1:hparams['cap_units'] +
                        hparams['energy_units'] + 1]),
                axis=-1)
            lf0 = output[:, hparams['cap_units'] + hparams['energy_units'] +
                         hparams['spec_units'] + 1:hparams['cap_units'] +
                         hparams['energy_units'] + hparams['spec_units'] +
                         hparams['lf0_units'] + 1]
            lf0[uv == 0] = -1.0e+10
            write_binary_file(sp,
                              os.path.join(
                                  args.output,
                                  os.path.splitext(input_name)[0] + '.sp'),
                              dtype=np.float64)
            write_binary_file(lf0,
                              os.path.join(
                                  args.output,
                                  os.path.splitext(input_name)[0] + '.lf0'),
                              dtype=np.float32)
            write_binary_file(cap,
                              os.path.join(
                                  args.output,
                                  os.path.splitext(input_name)[0] + '.ap'),
                              dtype=np.float64)
    elif args.model_type == 'acoustic_mgc':
        for input_name in data_list:
            input_name = input_name.split(' ')[0] + '.lab'
            logging.info(f'decode {input_name} ...')
            input = read_binary_file(os.path.join(
                os.path.join(data_dir, 'test', 'label'), input_name),
                                     dimension=hparams['in_channels'])
            input = torch.from_numpy(input).to(device)
            input = input.unsqueeze(0)
            output, uv_output = model(input)
            output = output.squeeze()
            uv_output = F.softmax(uv_output, dim=-1)[:, :, 0]
            uv_output = uv_output.squeeze()
            uv = torch.ones(uv_output.shape).to(device)
            uv[uv_output > 0.5] = 0.0
            uv = uv.unsqueeze(-1)
            output = torch.cat(
                (output[:, :hparams['mgc_units']], uv,
                 output[:, -(hparams['bap_units'] + hparams['lf0_units']):]),
                -1)
            output = output.cpu().squeeze().detach().numpy()
            uv = uv.cpu().squeeze().detach().numpy()
            output = output * cmvn['stddev_labels'] + cmvn["mean_labels"]

            mgc = output[:, :hparams['mgc_units']]
            lf0 = output[:, hparams['mgc_units'] + 1:hparams['mgc_units'] +
                         hparams['lf0_units'] + 1]
            bap = output[:, -(hparams['bap_units']):]
            write_binary_file(
                mgc,
                os.path.join(args.output,
                             os.path.splitext(input_name)[0] + '.mgc'))
            write_binary_file(
                lf0,
                os.path.join(args.output,
                             os.path.splitext(input_name)[0] + '.lf0'))
            write_binary_file(
                bap,
                os.path.join(args.output,
                             os.path.splitext(input_name)[0] + '.bap'))
示例#20
0

with open('gen_files.list', 'r') as fid:
    gen_files_list = [l.strip() for l in fid.readlines()]

with open('ref_files.list', 'r') as fid:
    ref_files_list = [l.strip() for l in fid.readlines()]

gen_normalizer = prepare_normalizer(gen_files_list, dim=40)
ref_normalizer = prepare_normalizer(ref_files_list, dim=40)
print(gen_normalizer.mean_)
assert len(list(syn_dir_path.glob('*.mgc'))) > 0
for file in syn_dir_path.glob('*.mgc'):
    print(file)

    syn_mgc_ori, _ = read_binary_file(file, dim=40)
    rnd = random.randint(0, syn_mgc_ori.shape[0] - 40)
    print(rnd)

    syn_mgc = gen_normalizer.transform(syn_mgc_ori)
    syn_mgc = syn_mgc.T
    print(syn_mgc.shape)
    syn_mgc = syn_mgc.reshape(1, 1, syn_mgc.shape[0], syn_mgc.shape[1])

    nat_mgc, _ = read_binary_file(nat_dir_path.joinpath(file.name), dim=40)
    # nat_mgc = ref_normalizer.transform(nat_mgc)
    # nat_sp = mgc2sp(nat_mgc.astype('float64'), alpha=0.42, gamma=0.0, fftlen=1024).T
    if 0:
        np.savetxt(file.name + 'a', nat_mgc.flatten())
    nat_mgc = nat_mgc.T
    print(nat_mgc.shape)
示例#21
0
 def get(self, dbfsid, url):
     """ Returns the file data for url """
     filepath = "%s/%s/%s" % (DBFS_FILESTORAGE_FOLDER, self.dbo.database,
                              url.replace("file:", ""))
     return utils.read_binary_file(filepath)
 def read_binary_file(self, file_subpath):
     return read_binary_file(self.to_full_path(file_subpath))