示例#1
0
dataset_list = sys.argv[5:]
dataset_num = len(dataset_list)

dis_phone_num = 4

# load data
for dataset_indx in range(dataset_num):
    # intial the dist class for different training datasets
    train_dist_path = os.path.join(dataset_list[dataset_indx],
                                   "subivector_dist.ark")
    dist_trans = dataset.Input_transform(train_dist_path, test_dist_path,
                                         dis_phone_num, prob_trans_path)

    ds_path = os.path.join(dataset_list[dataset_indx], "subivector.scp")
    ds = dataset.SubivectorDataset(ds_path)

    utt_num = ds.__len__()
    print("The number of utters in %s is  %d." % (ds_path, utt_num))

    subivector_dim = np.size(ds.data_list[0], 1)
    phone_num = np.size(ds.data_list[0], 0)

    # read into training data loader
    ds_loader = DataLoader(
        ds,
        batch_size=batchSize,  # batch training
        shuffle=False,
        num_workers=int(1))

    # set hook and store output in csvector
示例#2
0
文件: main.py 项目: njzheng/LCLDA
    exit()

# (Hyper parameters)
batchSize = 128  # batchsize的大小
niter = 500  # epoch的最大值

# load data
train_path = sys.argv[
    1]  #'/scratch/njzheng/myprogram/phone_vectors5/subivector.scp'
sre_test_path = sys.argv[
    2]  #'/scratch/njzheng/myprogram/sre_test/subivector_test.scp'
sre_train_path = sys.argv[
    3]  #'/scratch/njzheng/myprogram/sre_train/subivector_test.scp'
sre_path = sys.argv[4]  #'/scratch/njzheng/myprogram/sre/subivector_test.scp'

train_dataset = dataset.SubivectorDataset(train_path)
sre_test_dataset = dataset.SubivectorDataset(sre_test_path)
sre_train_dataset = dataset.SubivectorDataset(sre_train_path)
sre_dataset = dataset.SubivectorDataset(sre_path)

utt_num = train_dataset.__len__()
train_len = int(0.9 * utt_num)
valid_len = utt_num - train_len
train, valid = torch.utils.data.random_split(train_dataset,
                                             lengths=[train_len, valid_len])

subivector_dim = np.size(train_dataset.data_list[0], 0)
phone_num = np.size(train_dataset.data_list[0], 1)

# load spk2int list
uttid2int_dic_path = sys.argv[
示例#3
0
phone_num = 43
subnet_out_dim = 50
out_dim_sqrt = torch.tensor(subnet_out_dim).float().sqrt()

subnet_list = []
# Need loop ==============================================================
for phone_indx in range(34, phone_num + 1):
    dnn_name = sys.argv[3] + "." + str(phone_indx)  # $sub_train_path/net.pkl

    # train_scp_path = os.path.join(train_path,"subivector.scp")
    train_scp_path = os.path.join(train_path, "phone_vector",
                                  "triplet_data." + str(phone_indx) + ".scp")

    print('scp file is: %s' % (train_scp_path))

    train_dataset = dataset.SubivectorDataset(train_scp_path)
    subivector_dim = np.size(train_dataset.data_list[0], 1)
    in_dim_sqrt = torch.tensor(subivector_dim).float().sqrt()

    train_utt_num = train_dataset.__len__()

    train_len = int(0.9 * train_utt_num)
    valid_len = train_utt_num - train_len
    train, valid = torch.utils.data.random_split(
        train_dataset, lengths=[train_len, valid_len])

    print('The number of training and valid utters are  %d  %d' %
          (train_len, valid_len))
    print('The subivector_dim is %d, phone_indx is %d' %
          (subivector_dim, phone_indx))