Пример #1
0
def get_exp_path(system_name, args, override=False):
  """ Return: exp_dir, model_path, log_path """
  exp_dir = get_exppath(tag='TIDIGITS_%s_%s_%s' %
    (system_name, args.task, args.feat))
  if 'nmix' in args:
    exp_dir += '_%d' % args.nmix
  if 'tdim' in args:
    exp_dir += '_%d' % args.tdim
  # ====== check override ====== #
  if bool(override) and os.path.exists(exp_dir):
    shutil.rmtree(exp_dir)
  if not os.path.exists(exp_dir):
    os.mkdir(exp_dir)
  # ====== basic paths ====== #
  model_path = os.path.join(exp_dir, 'model.ai')
  log_path = os.path.join(exp_dir,
                         'log_%s.txt' % get_formatted_datetime(only_number=True))
  print("Exp dir:", ctext(exp_dir, 'cyan'))
  print("Model path:", ctext(model_path, 'cyan'))
  print("Log path:", ctext(log_path, 'cyan'))
  return exp_dir, model_path, log_path
Пример #2
0
def get_exp_path(system_name, args, override=False):
    """ Return: exp_dir, model_path, log_path """
    exp_dir = get_exppath(tag='TIDIGITS_%s_%s_%s' %
                          (system_name, args.task, args.feat))
    if 'nmix' in args:
        exp_dir += '_%d' % args.nmix
    if 'tdim' in args:
        exp_dir += '_%d' % args.tdim
    # ====== check override ====== #
    if bool(override) and os.path.exists(exp_dir):
        shutil.rmtree(exp_dir)
    if not os.path.exists(exp_dir):
        os.mkdir(exp_dir)
    # ====== basic paths ====== #
    model_path = os.path.join(exp_dir, 'model.ai')
    log_path = os.path.join(
        exp_dir, 'log_%s.txt' % get_formatted_datetime(only_number=True))
    print("Exp dir:", ctext(exp_dir, 'cyan'))
    print("Model path:", ctext(model_path, 'cyan'))
    print("Log path:", ctext(log_path, 'cyan'))
    return exp_dir, model_path, log_path
Пример #3
0
from scipy.io import savemat

from odin import fuel as F, visual as V
from odin.utils import ctext, Progbar, get_exppath, select_path
from odin.stats import train_valid_test_split, sampling_iter
from odin.preprocessing.signal import segment_axis

HOME_PATH = os.path.expanduser('~')
# fixed path to 'voxceleb1_wav' folder
PATH_TO_WAV = select_path('/media/data2/SRE_DATA/voxceleb',
                          '/mnt/sdb1/SRE_DATA/voxceleb',
                          os.path.join(HOME_PATH, 'data', 'voxceleb'),
                          os.path.join(HOME_PATH, 'voxceleb'),
                          create_new=False)
# path to folder contains experiment results
PATH_EXP = get_exppath('voxceleb')
# output path for acoustic features directory
PATH_ACOUSTIC_FEAT = os.path.join(PATH_EXP, 'voxceleb_feat')
if not os.path.exists(PATH_ACOUSTIC_FEAT):
    os.mkdir(PATH_ACOUSTIC_FEAT)
# ====== remove '_quarter' if you want full training data ====== #
FILE_LIST = "voxceleb_files_quarter"
TRAIN_LIST = "voxceleb_sys_train_with_labels_quarter"
TRIAL_LIST = "voxceleb_trials"
# ====== Load the file list ====== #
ds = F.load_voxceleb_list()
WAV_FILES = {}  # dictionary mapping 'file_path' -> 'file_name'
for path, channel, name in ds[FILE_LIST]:
    path = os.path.join(PATH_TO_WAV, path)
    # validate all files are exist
    assert os.path.exists(path), path
Пример #4
0
from odin.stats import train_valid_test_split, sampling_iter

_support_label = {
    'other': 0,
    'gender': 1,
    'age': 2,
    'dialect': 3,
    'speaker': 4,
    'production': 5,
    'digit': 6,
}

# ===========================================================================
# Const for path and features configuration
# ===========================================================================
PATH_EXP = get_exppath(tag='TIDIGITS', override=False)
# ====== acoustic feature extraction ====== #
PATH_ACOUSTIC = os.path.join(PATH_EXP, 'TIDIGITS_feat')


class FeatureConfigs(object):
    padding = False
    sr = 8000
    window = 'hamm'
    n_fft = 512
    n_mels = 40
    n_ceps = 40
    fmin = 100
    fmax = 4000
    frame_length = 0.025
    step_length = 0.010
Пример #5
0
            '--stat', "Force re-extraction of centered statistics",
            False).add('--tmat', "Force re-run training Tmatrix", False).add(
                '--ivec', "Force re-run extraction of i-vector", False).add(
                    '--all', "Run all the system again, just a shortcut",
                    False).add('--acous',
                               "Force re-run acoustic feature extraction",
                               False).parse()
args.gmm |= args.all
args.stat |= args.all | args.gmm
args.tmat |= args.all | args.stat
args.ivec |= args.all | args.tmat
FEAT = args.feat
# ===========================================================================
# Const
# ===========================================================================
EXP_DIR = get_exppath('FSDD')
PATH_ACOUSTIC_FEATURES = os.path.join(EXP_DIR, 'features')
# ====== GMM trainign ====== #
NMIX = args.nmix
GMM_NITER = 12
GMM_DOWNSAMPLE = 1
GMM_STOCHASTIC = True
GMM_DTYPE = 'float64'
# ====== IVEC training ====== #
TV_DIM = args.tdim
TV_NITER = 16
TV_DTYPE = 'float64'
# ===========================================================================
# Extract acoustic features
# ===========================================================================
# path to preprocessed dataset
Пример #6
0
from __future__ import print_function, division, absolute_import
import os
os.environ['ODIN'] = 'gpu,float32'
import shutil

import numpy as np
import tensorflow as tf

from odin import backend as K, nnet as N, visual as V, fuel as F
from odin.utils import minibatch, Progbar, get_exppath, crypto
from odin import ml

from sklearn.svm import SVC
from sklearn.metrics import classification_report

EXP_PATH = get_exppath('cifar10_ivec')
# ===========================================================================
# Load the dataset
# ===========================================================================
ds = F.CIFAR10.load()
print(ds)
X_train, y_train = ds['X_train'][:].reshape(-1, 3 * 32 * 32), ds['y_train'][:]
X_test, y_test = ds['X_test'][:].reshape(-1, 3 * 32 * 32), ds['y_test'][:]
# ====== normalize the data ====== #
X_train = X_train / 255.
X_test = X_test / 255.
print("Input:", X_train.shape, X_test.shape)
# ===========================================================================
# Training the GMM
# ===========================================================================
ivec = ml.Ivector(path=EXP_PATH, nmix=32, tv_dim=16,
Пример #7
0
    CURRENT_STATE = SystemStates.TRAINING
elif _script_name in ('make_score'):
    CURRENT_STATE = SystemStates.SCORING
    _check_feature_extraction_requirement()
else:
    raise RuntimeError("Unknown states for current running script: %s/%s" %
                       (get_script_path(), get_script_name()))
# some fancy log of current state
print(ctext('====================================', 'red'))
print(ctext("System state:", 'cyan'), ctext(CURRENT_STATE, 'yellow'))
print(ctext('====================================', 'red'))
# ===========================================================================
# FILE LIST PATH
# ===========================================================================
# ====== basic directories ====== #
EXP_DIR = get_exppath('sre', override=False)
# this folder store extracted vectors for training backend and extracting scores
VECTORS_DIR = os.path.join(EXP_DIR, 'vectors')
if not os.path.exists(VECTORS_DIR):
    os.mkdir(VECTORS_DIR)
# this folder store the results
RESULT_DIR = os.path.join(EXP_DIR, 'results')
if not os.path.exists(RESULT_DIR):
    os.mkdir(RESULT_DIR)
# this folder store the analysis
ANALYSIS_DIR = os.path.join(EXP_DIR, 'analysis')
if not os.path.exists(ANALYSIS_DIR):
    os.mkdir(ANALYSIS_DIR)
# ====== raw data ====== #
PATH_BASE = select_path('/media/data2/SRE_DATA',
                        '/mnt/sda1/SRE_DATA',
Пример #8
0
from odin import fuel as F, visual as V
from odin.utils import ctext, Progbar, get_exppath, select_path
from odin.stats import train_valid_test_split, sampling_iter
from odin.preprocessing.signal import segment_axis

HOME_PATH = os.path.expanduser('~')
# fixed path to 'voxceleb1_wav' folder
PATH_TO_WAV = select_path(
    '/media/data2/SRE_DATA/voxceleb',
    '/mnt/sdb1/SRE_DATA/voxceleb',
    os.path.join(HOME_PATH, 'data', 'voxceleb'),
    os.path.join(HOME_PATH, 'voxceleb'),
    create_new=False
)
# path to folder contains experiment results
PATH_EXP = get_exppath('voxceleb')
# output path for acoustic features directory
PATH_ACOUSTIC_FEAT = os.path.join(PATH_EXP, 'voxceleb_feat')
if not os.path.exists(PATH_ACOUSTIC_FEAT):
  os.mkdir(PATH_ACOUSTIC_FEAT)
# ====== remove '_quarter' if you want full training data ====== #
FILE_LIST = "voxceleb_files_quarter"
TRAIN_LIST = "voxceleb_sys_train_with_labels_quarter"
TRIAL_LIST = "voxceleb_trials"
# ====== Load the file list ====== #
ds = F.load_voxceleb_list()
WAV_FILES = {} # dictionary mapping 'file_path' -> 'file_name'
for path, channel, name in ds[FILE_LIST]:
  path = os.path.join(PATH_TO_WAV, path)
  # validate all files are exist
  assert os.path.exists(path), path
Пример #9
0
from odin.stats import train_valid_test_split, sampling_iter

_support_label = {
    'other': 0,
    'gender': 1,
    'age': 2,
    'dialect': 3,
    'speaker': 4,
    'production': 5,
    'digit': 6,
}

# ===========================================================================
# Const for path and features configuration
# ===========================================================================
PATH_EXP = get_exppath(tag='TIDIGITS', override=False)
# ====== acoustic feature extraction ====== #
PATH_ACOUSTIC = os.path.join(PATH_EXP, 'TIDIGITS_feat')

class FeatureConfigs(object):
  padding = False
  sr = 8000
  window = 'hamm'
  n_fft = 512
  n_mels = 40
  n_ceps = 40
  fmin = 100
  fmax = 4000
  frame_length = 0.025
  step_length = 0.010
  dtype = 'float16'
Пример #10
0
  CURRENT_STATE = SystemStates.TRAINING
elif _script_name in ('make_score'):
  CURRENT_STATE = SystemStates.SCORING
  _check_feature_extraction_requirement()
else:
  raise RuntimeError("Unknown states for current running script: %s/%s" %
    (get_script_path(), get_script_name()))
# some fancy log of current state
print(ctext('====================================', 'red'))
print(ctext("System state:", 'cyan'), ctext(CURRENT_STATE, 'yellow'))
print(ctext('====================================', 'red'))
# ===========================================================================
# FILE LIST PATH
# ===========================================================================
# ====== basic directories ====== #
EXP_DIR = get_exppath('sre', override=False)
# this folder store extracted vectors for training backend and extracting scores
VECTORS_DIR = os.path.join(EXP_DIR, 'vectors')
if not os.path.exists(VECTORS_DIR):
  os.mkdir(VECTORS_DIR)
# this folder store the results
RESULT_DIR = os.path.join(EXP_DIR, 'results')
if not os.path.exists(RESULT_DIR):
  os.mkdir(RESULT_DIR)
# this folder store the analysis
ANALYSIS_DIR = os.path.join(EXP_DIR, 'analysis')
if not os.path.exists(ANALYSIS_DIR):
  os.mkdir(ANALYSIS_DIR)
# ====== raw data ====== #
PATH_BASE = select_path(
    '/media/data2/SRE_DATA',
Пример #11
0
from __future__ import print_function, division, absolute_import
import os
os.environ['ODIN'] = 'gpu,float32'
import shutil

import numpy as np
import tensorflow as tf

from odin import backend as K, nnet as N, visual as V, fuel as F
from odin.utils import batching, Progbar, get_exppath, crypto
from odin import ml

from sklearn.svm import SVC
from sklearn.metrics import classification_report

EXP_PATH = get_exppath('cifar10_ivec')
# ===========================================================================
# Load the dataset
# ===========================================================================
ds = F.CIFAR10.load()
print(ds)
X_train, y_train = ds['X_train'][:].reshape(-1, 3 * 32 * 32), ds['y_train'][:]
X_test, y_test = ds['X_test'][:].reshape(-1, 3 * 32 * 32), ds['y_test'][:]
# ====== normalize the data ====== #
X_train = X_train / 255.
X_test = X_test / 255.
print("Input:", X_train.shape, X_test.shape)
# ===========================================================================
# Training the GMM
# ===========================================================================
ivec = ml.Ivector(path=EXP_PATH, nmix=32, tv_dim=16,
Пример #12
0
).add('--gmm', "Force re-run training GMM", False
).add('--stat', "Force re-extraction of centered statistics", False
).add('--tmat', "Force re-run training Tmatrix", False
).add('--ivec', "Force re-run extraction of i-vector", False
).add('--all', "Run all the system again, just a shortcut", False
).add('--acous', "Force re-run acoustic feature extraction", False
).parse()
args.gmm |= args.all
args.stat |= args.all | args.gmm
args.tmat |= args.all | args.stat
args.ivec |= args.all | args.tmat
FEAT = args.feat
# ===========================================================================
# Const
# ===========================================================================
EXP_DIR = get_exppath('FSDD')
PATH_ACOUSTIC_FEATURES = os.path.join(EXP_DIR, 'features')
# ====== GMM trainign ====== #
NMIX = args.nmix
GMM_NITER = 12
GMM_DOWNSAMPLE = 1
GMM_STOCHASTIC = True
GMM_DTYPE = 'float64'
# ====== IVEC training ====== #
TV_DIM = args.tdim
TV_NITER = 16
TV_DTYPE = 'float64'
# ===========================================================================
# Extract acoustic features
# ===========================================================================
# path to preprocessed dataset