Exemplo n.º 1
0
def get_model_path(system_name, logging=True):
    """
  Parameters
  ----------
  args_name : list of string
    list of name for parsed argument, taken into account for creating
    model name

  Return
  ------
  exp_dir, model_path, log_path
  """
    args_name = []
    if system_name == 'xvec':
        args_name += ['utt', 'seq']
    elif system_name == 'ivec':
        args_name += ['nmix', 'tdim']
    else:
        raise ValueError("No support for system with name: %s" % system_name)
    args_name += ['mindur', 'minutt']
    # ====== base system and feature identity ====== #
    name = str(system_name).lower()
    name += '_' + FEATURE_RECIPE.replace('_', '')
    name += '.' + FEATURE_NAME
    # ====== concat the attributes ====== #
    attributes = []
    for i in [str(i) for i in args_name]:
        attributes.append(str(getattr(_args, i)))
    attributes = '_'.join(attributes)
    name += '.' + attributes
    # ====== check the exclude dataset ====== #
    excluded_dataset = str(_args.exclude).strip()
    if len(excluded_dataset) > 0:
        dataset_str = []
        for excluded in sorted(set(excluded_dataset.split(','))):
            assert excluded in sre_file_list or excluded == 'noise', \
            "Unknown excluded dataset with name: '%s'" % excluded
            dataset_str.append(excluded)
        dataset_str = '_'.join(dataset_str)
        name += '.' + dataset_str
    # ====== check save_path ====== #
    save_path = os.path.join(EXP_DIR, name)
    if os.path.exists(save_path) and IS_OVERRIDE:
        print("Override path:", ctext(save_path, 'yellow'))
        shutil.rmtree(save_path)
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    # ====== return path ====== #
    log_path = get_logpath(name='log.txt',
                           increasing=True,
                           odin_base=False,
                           root=save_path)
    model_path = os.path.join(save_path, 'model.ai')
    if bool(logging):
        print("Model path:", ctext(model_path, 'cyan'))
        print("Log path:", ctext(log_path, 'cyan'))
    return save_path, model_path, log_path
Exemplo n.º 2
0
def get_model_path(system_name, logging=True):
  """
  Parameters
  ----------
  args_name : list of string
    list of name for parsed argument, taken into account for creating
    model name

  Return
  ------
  exp_dir, model_path, log_path
  """
  args_name = []
  if system_name == 'xvec':
    args_name += ['utt', 'seq']
  elif system_name == 'ivec':
    args_name += ['nmix', 'tdim']
  else:
    raise ValueError("No support for system with name: %s" % system_name)
  args_name += ['mindur', 'minutt']
  # ====== base system and feature identity ====== #
  name = str(system_name).lower()
  name += '_' + FEATURE_RECIPE.replace('_', '')
  name += '.' + FEATURE_NAME
  # ====== concat the attributes ====== #
  attributes = []
  for i in [str(i) for i in args_name]:
    attributes.append(str(getattr(_args, i)))
  attributes = '_'.join(attributes)
  name += '.' + attributes
  # ====== check the exclude dataset ====== #
  excluded_dataset = str(_args.exclude).strip()
  if len(excluded_dataset) > 0:
    dataset_str = []
    for excluded in sorted(set(excluded_dataset.split(','))):
      assert excluded in sre_file_list or excluded == 'noise', \
      "Unknown excluded dataset with name: '%s'" % excluded
      dataset_str.append(excluded)
    dataset_str = '_'.join(dataset_str)
    name += '.' + dataset_str
  # ====== check save_path ====== #
  save_path = os.path.join(EXP_DIR, name)
  if os.path.exists(save_path) and IS_OVERRIDE:
    print("Override path:", ctext(save_path, 'yellow'))
    shutil.rmtree(save_path)
  if not os.path.exists(save_path):
    os.mkdir(save_path)
  # ====== return path ====== #
  log_path = get_logpath(name='log.txt', increasing=True,
                         odin_base=False, root=save_path)
  model_path = os.path.join(save_path, 'model.ai')
  if bool(logging):
    print("Model path:", ctext(model_path, 'cyan'))
    print("Log path:", ctext(log_path, 'cyan'))
  return save_path, model_path, log_path
Exemplo n.º 3
0
        # pp.validate_features(processor,
        #                      nb_samples=12,
        #                      path=os.path.join(EXP_DIR, 'feature_validation'),
        #                      override=True)
ds = F.Dataset(PATH_ACOUSTIC_FEATURES, read_only=True)
print(ds)
indices = list(ds['indices_%s' % args.feat].items())
print("Utterances length:")
print("   ",
      describe([end - start for name, (start, end) in indices], shorten=True))
# ===========================================================================
# Basic path for GMM, T-matrix and I-vector
# ===========================================================================
EXP_DIR = os.path.join(EXP_DIR, '%s_%d_%d' % (FEAT, NMIX, TV_DIM))
LOG_PATH = get_logpath(name='log.txt',
                       override=False,
                       root=EXP_DIR,
                       odin_base=False)
stdio(LOG_PATH)
print("Exp-dir:", ctext(EXP_DIR, 'cyan'))
print("Log path:", ctext(LOG_PATH, 'cyan'))
# ====== ivec path ====== #
GMM_PATH = os.path.join(EXP_DIR, 'gmm')
TMAT_PATH = os.path.join(EXP_DIR, 'tmat')
# zero order statistics
Z_PATH = (os.path.join(EXP_DIR, 'Z_train'), os.path.join(EXP_DIR, 'Z_test'))
# first order statistics
F_PATH = (os.path.join(EXP_DIR, 'F_train'), os.path.join(EXP_DIR, 'F_test'))
# i-vector path
I_PATH = (os.path.join(EXP_DIR, 'I_train'), os.path.join(EXP_DIR, 'I_test'))
# labels
L_PATH = (  # labels
Exemplo n.º 4
0
from __future__ import print_function, division, absolute_import

import os
import cPickle
import requests
import webbrowser
from twython import Twython

from odin import utils

# ===========================================================================
# Constants
# ===========================================================================
CONSUMER_KEY = os.environ['TWITTER_KEY']
CONSUMER_SECRET = os.environ['TWITTER_SECRET']
SAVE_PATH = utils.get_logpath('twitter.log', override=False)

# ===========================================================================
# First authentication
# ===========================================================================
if not os.path.exists(SAVE_PATH):
    twitter = Twython(app_key=CONSUMER_KEY, app_secret=CONSUMER_SECRET)
    user_timeline = twitter.get_user_timeline(screen_name="NgoTrongTrung")
    auth = twitter.get_authentication_tokens()
    OAUTH_TOKEN = auth['oauth_token']
    OAUTH_TOKEN_SECRET = auth['oauth_token_secret']
    # ====== Getting the PIN using verifier URL ====== #
    webbrowser.open(auth['auth_url'])
    PIN = raw_input('We will open browser, copy the PIN code here:')
    if len(PIN) != 7:
        raise ValueError('PIN must be 7 numbers.')
Exemplo n.º 5
0
#!/usr/bin/env python
from __future__ import print_function, division, absolute_import

import numpy as np

from odin.utils import get_modelpath, ArgController, stdio, get_logpath

stdio(get_logpath('tmp.log', override=True))

arg = ArgController(version=0.12).add(
    '-backend', 'theano or tensorflow',
    'tensorflow').add('-ds', 'dataset cifar10, or mnist',
                      'mnist').add('-epoch', 'number of epoch',
                                   3).add('-lr', 'learning rate',
                                          0.01).parse()

import os
os.environ['ODIN'] = 'float32,gpu,%s,seed=12' % arg['backend']

from odin import backend as K
from odin import nnet as N
from odin import fuel, training
from six.moves import cPickle

# ===========================================================================
# Load data
# ===========================================================================
USE_MNIST_DATA = True if 'mnist' in arg['ds'].lower() else False

if USE_MNIST_DATA:
    ds = fuel.load_mnist()
Exemplo n.º 6
0
 def config_path(self):
   return os.path.join(get_logpath(), '%s%s.cfg' %
       (self.__class__.__name__, self._id))
Exemplo n.º 7
0
 def _transform(self, X):
   # ====== file input file ====== #
   raw = None
   path = None
   if isinstance(X, Mapping):
     if 'path' in X:
       path = X['path']
     if 'sr' in X:
       if self.sr is None:
         self.sr = X['sr']
         self._update_config()
         self._first_config_generated = True
       elif self.sr != X['sr']:
         raise ValueError("Given sample rate: %d, but the audio file has "
                          "sample rate: %d" % (self.sr, X['sr']))
     if 'raw' in X:
       raw = X['raw']
   elif is_string(X):
     path = X
   elif isinstance(X, np.ndarray):
     raw = X
   else:
     raise ValueError("openSMILE extractor require path to audio file.")
   # no sample rate specified, cannot generate appropriate config
   if self.sr is None:
     raise RuntimeError("Cannot acquire sample rate for the input.")
   # ====== first time generate config ====== #
   if not self._first_config_generated:
     self._first_config_generated = True
     self._update_config()
   # ====== extract SAD ====== #
   unique_id = os.getpid() + random.randint(0, 10e8)
   inpath = os.path.join(
       get_logpath(), '%s%d.wav' % (self.__class__.__name__, unique_id))
   outpath = os.path.join(
       get_logpath(), '%s%d.csv' % (self.__class__.__name__, unique_id))
   try:
     if path is None or not os.path.exists(path):
       if raw is None:
         raise RuntimeError("openSMILE require input audio file, since "
             "we cannot find any audio file, it is required to provide "
             "raw array and sample rate, so the audio file will be cached.")
       from soundfile import write
       write(inpath, data=raw, samplerate=self.sr)
       path = inpath
     # if in debug mode or not
     command = 'SMILExtract -loglevel %d -C %s -I %s -O %s' % \
         (self._log_level, self.config_path, path, outpath)
     os.system(command)
     results = np.genfromtxt(outpath, dtype='float32',
                             delimiter=',', skip_header=0)
   except Exception as e:
     import traceback; traceback.print_exc()
     raise e
   finally:
     if os.path.exists(inpath):
       os.remove(inpath)
     if os.path.exists(outpath):
       os.remove(outpath)
   # ====== post-processing ====== #
   X_update = self._post_processing(results)
   if not isinstance(X_update, dict):
     raise ValueError("_post_processing must return a dictionary.")
   return X_update
Exemplo n.º 8
0
# => Gaussian normalized is better, and float16 is no different from float32
# ===========================================================================
from __future__ import print_function, division, absolute_import

import numpy as np

import os
os.environ['ODIN'] = 'float32,gpu,theano,seed=12,cnmem=0.4'

from odin import backend as K
from odin import nnet as N
from odin import fuel, training
from odin.utils import get_modelpath, ArgController, stdio, get_logpath
from six.moves import cPickle

stdio(get_logpath('tmp.log'))

# ===========================================================================
# Load data
# ===========================================================================
ds = fuel.load_cifar10()
print(ds)

X_train = K.placeholder(shape=(None,) + ds['X_train'].shape[1:], name='X_train')
X_score = K.placeholder(shape=(None,) + ds['X_train'].shape[1:], name='X_score')
y = K.placeholder(shape=(None,), name='y', dtype='int32')

# ===========================================================================
# Build network
# ===========================================================================
ops = N.Sequence([
Exemplo n.º 9
0
from __future__ import print_function, division, absolute_import

import os
import requests
import webbrowser
from twython import Twython

from odin import utils

CONSUMER_KEY = os.environ['TWITTER_KEY']
CONSUMER_SECRET = os.environ['TWITTER_SECRET']
SAVE_PATH = utils.get_logpath('twitter')

# print(auth)
twitter = Twython(app_key=CONSUMER_KEY,
                  app_secret=CONSUMER_SECRET,
                  oauth_version=2)
ACCESS_TOKEN = twitter.obtain_access_token()

twitter = Twython(CONSUMER_KEY, access_token=ACCESS_TOKEN)
search_results = twitter.search(q='WebsDotCom', count=50)
print(search_results)
Exemplo n.º 10
0
from odin import preprocessing as pp
from odin import fuel as F, nnet as N, backend as K
from odin.utils import (get_module_from_path, get_script_path, ctext, Progbar,
                        stdio, get_logpath, get_formatted_datetime)
from odin.stats import describe

from helpers import (SCORING_DATASETS, BACKEND_DATASETS, SCORE_SYSTEM_NAME,
                     SCORE_SYSTEM_ID, N_PLDA, N_LDA, PLDA_MAXIMUM_LIKELIHOOD,
                     PLDA_SHOW_LLK, PATH_ACOUSTIC_FEATURES, FEATURE_RECIPE,
                     FEATURE_NAME, get_model_path, NCPU, get_logpath,
                     prepare_dnn_feeder_recipe, sre_file_list, Config, EXP_DIR,
                     VECTORS_DIR, RESULT_DIR, filter_utterances)
# ====== scoring log ====== #
stdio(
    get_logpath(name='make_score.log',
                increasing=True,
                odin_base=False,
                root=EXP_DIR))
print('=' * 48)
print(get_formatted_datetime(only_number=False))
print("System name    :", SCORE_SYSTEM_NAME)
print("System id      :", SCORE_SYSTEM_ID)
print("Feature recipe :", FEATURE_RECIPE)
print("Feature name   :", FEATURE_NAME)
print("Backend dataset:", ','.join(BACKEND_DATASETS.keys()))
print("Scoring dataset:", ','.join(SCORING_DATASETS.keys()))
print('=' * 48)


# ===========================================================================
# Some helper
# ===========================================================================
Exemplo n.º 11
0
# ===========================================================================
from __future__ import print_function, division, absolute_import

import numpy as np

import os

os.environ['ODIN'] = 'float32,gpu,theano,seed=12,cnmem=0.4'

from odin import backend as K
from odin import nnet as N
from odin import fuel, training
from odin.utils import get_modelpath, ArgController, stdio, get_logpath
from six.moves import cPickle

stdio(get_logpath('tmp.log'))

# ===========================================================================
# Load data
# ===========================================================================
ds = fuel.load_cifar10()
print(ds)

X_train = K.placeholder(shape=(None, ) + ds['X_train'].shape[1:],
                        name='X_train')
X_score = K.placeholder(shape=(None, ) + ds['X_train'].shape[1:],
                        name='X_score')
y = K.placeholder(shape=(None, ), name='y', dtype='int32')

# ===========================================================================
# Build network
Exemplo n.º 12
0
from sklearn.metrics import accuracy_score, log_loss, f1_score

from odin import fuel as F
from odin import nnet as N, backend as K
from odin import visual as V
from odin.utils import (ctext, mpi, Progbar, catch_warnings_ignore, stdio,
                        get_logpath, catch_warnings_ignore)


from helpers import (FEATURE_RECIPE, FEATURE_NAME, PATH_ACOUSTIC_FEATURES,
                     MINIMUM_UTT_DURATION, ANALYSIS_DIR, Config,
                     filter_utterances, prepare_dnn_data)

# ====== prepare log ====== #
stdio(get_logpath(name="analyze_data.log", increasing=True,
                  odin_base=False, root=ANALYSIS_DIR))
print(ctext(FEATURE_RECIPE, 'lightyellow'))
print(ctext(FEATURE_NAME, 'lightyellow'))
assert os.path.isdir(os.path.join(PATH_ACOUSTIC_FEATURES, FEATURE_RECIPE))
# ====== essential path ====== #
figure_path = os.path.join(ANALYSIS_DIR, '%s_%s.pdf' %
                           (FEATURE_RECIPE.replace('_', ''), FEATURE_NAME))
print(ctext(figure_path, 'lightyellow'))
# ===========================================================================
# Load the data
# ===========================================================================
ds = F.Dataset(os.path.join(PATH_ACOUSTIC_FEATURES, FEATURE_RECIPE),
               read_only=True)
X = ds[FEATURE_NAME]
# remove all noise data
indices = {name: (start, end)
Exemplo n.º 13
0
# ====== import ====== #
import os

os.environ['ODIN'] = 'float32,%s,%s' % (args['dev'], args['bk'])

import numpy as np

np.random.seed(1208)

from odin import nnet as N, backend as K, fuel as F, stats
from odin.utils import get_modelpath, stdio, get_logpath, get_datasetpath
from odin.basic import has_roles, BIAS, WEIGHT
from odin import training

# set log path
stdio(path=get_logpath('digit_audio.log', override=True))

# ===========================================================================
# Get wav and process new dataset configuration
# ===========================================================================
# ====== process new features ====== #
if False:
    datapath = F.load_digit_wav()
    output_path = get_datasetpath(name='digit', override=True)
    feat = F.SpeechProcessor(datapath,
                             output_path,
                             audio_ext='wav',
                             sr_new=8000,
                             win=0.025,
                             shift=0.01,
                             nb_melfilters=40,
Exemplo n.º 14
0
 def cache_path(self):
     return utils.get_logpath(self.__class__.__name__, override=False)
Exemplo n.º 15
0
args = ArgController(
).add('-model', 'model name, specified in models_cifar.py', 'cnn'
).parse()

import os
os.environ['ODIN'] = 'float32,gpu,seed=87654321,log'

import numpy as np
import tensorflow as tf

from odin import fuel as F, nnet as N, backend as K, training, utils
from odin.stats import train_valid_test_split

MODEL_NAME = args.model
MODEL_PATH = utils.get_modelpath(name='cifar10_%s' % MODEL_NAME, override=True)
LOG_PATH = utils.get_logpath(name='cifar10_%s.log' % MODEL_NAME, override=True)
stdio(LOG_PATH)
# ===========================================================================
# Some handmade constants
# ===========================================================================
NB_EPOCH = 10
LEARNING_RATE = 0.001
# ===========================================================================
# Load dataset
# ===========================================================================
ds = F.CIFAR10.get_dataset()
nb_labels = 10
print(ds)
X_train = ds['X_train'][:].astype('float32') / 255.
y_train = one_hot(ds['y_train'][:], nb_classes=nb_labels)
X_test = ds['X_test'][:].astype('float32') / 255.
Exemplo n.º 16
0
from odin import training
from odin import preprocessing as pp
from odin.ml import evaluate, fast_tsne
from odin.visual import (print_dist, print_confusion, print_hist, plot_scatter,
                         plot_figure, plot_spectrogram, plot_save,
                         plot_confusion_matrix, generate_random_colors,
                         generate_random_marker)
from odin.utils import (get_logpath, get_modelpath, get_datasetpath,
                        get_figpath, Progbar, unique_labels, chain,
                        as_tuple_of_shape, stdio, ctext, ArgController)
# ===========================================================================
# Const
# ===========================================================================
FEAT = ['mspec', 'sad']
MODEL_PATH = get_modelpath(name='DIGITS', override=True)
LOG_PATH = get_logpath(name='digits.log', override=True)
FIG_PATH = get_figpath(name='DIGITS', override=True)
stdio(LOG_PATH)

DEBUG = False
# ====== trainign ====== #
BATCH_SIZE = 32
NB_EPOCH = 20
NB_SAMPLES = 8
VALID_PERCENTAGE = 0.4
# ===========================================================================
# Load dataset
# ===========================================================================
path = get_datasetpath(name='TIDIGITS_feats', override=False)
assert os.path.isdir(path), \
    "Cannot find preprocessed feature at: %s, try to run 'odin/examples/features.py'" % path
Exemplo n.º 17
0
from sklearn.metrics import accuracy_score, log_loss, f1_score

from odin import fuel as F
from odin import nnet as N, backend as K
from odin import visual as V
from odin.utils import (ctext, mpi, Progbar, catch_warnings_ignore, stdio,
                        get_logpath, catch_warnings_ignore)

from helpers import (FEATURE_RECIPE, FEATURE_NAME, PATH_ACOUSTIC_FEATURES,
                     MINIMUM_UTT_DURATION, ANALYSIS_DIR, Config,
                     filter_utterances, prepare_dnn_data)

# ====== prepare log ====== #
stdio(
    get_logpath(name="analyze_data.log",
                increasing=True,
                odin_base=False,
                root=ANALYSIS_DIR))
print(ctext(FEATURE_RECIPE, 'lightyellow'))
print(ctext(FEATURE_NAME, 'lightyellow'))
assert os.path.isdir(os.path.join(PATH_ACOUSTIC_FEATURES, FEATURE_RECIPE))
# ====== essential path ====== #
figure_path = os.path.join(
    ANALYSIS_DIR,
    '%s_%s.pdf' % (FEATURE_RECIPE.replace('_', ''), FEATURE_NAME))
print(ctext(figure_path, 'lightyellow'))
# ===========================================================================
# Load the data
# ===========================================================================
ds = F.Dataset(os.path.join(PATH_ACOUSTIC_FEATURES, FEATURE_RECIPE),
               read_only=True)
X = ds[FEATURE_NAME]
Exemplo n.º 18
0
        stop_on_failure=True)
    processor.run()
    # pp.validate_features(processor,
    #                      nb_samples=12,
    #                      path=os.path.join(EXP_DIR, 'feature_validation'),
    #                      override=True)
ds = F.Dataset(PATH_ACOUSTIC_FEATURES, read_only=True)
print(ds)
indices = list(ds['indices_%s' % args.feat].items())
print("Utterances length:")
print("   ", describe([end - start for name, (start, end) in indices], shorten=True))
# ===========================================================================
# Basic path for GMM, T-matrix and I-vector
# ===========================================================================
EXP_DIR = os.path.join(EXP_DIR, '%s_%d_%d' % (FEAT, NMIX, TV_DIM))
LOG_PATH = get_logpath(name='log.txt', override=False, root=EXP_DIR, odin_base=False)
stdio(LOG_PATH)
print("Exp-dir:", ctext(EXP_DIR, 'cyan'))
print("Log path:", ctext(LOG_PATH, 'cyan'))
# ====== ivec path ====== #
GMM_PATH = os.path.join(EXP_DIR, 'gmm')
TMAT_PATH = os.path.join(EXP_DIR, 'tmat')
# zero order statistics
Z_PATH = (
    os.path.join(EXP_DIR, 'Z_train'),
    os.path.join(EXP_DIR, 'Z_test'))
# first order statistics
F_PATH = (
    os.path.join(EXP_DIR, 'F_train'),
    os.path.join(EXP_DIR, 'F_test'))
# i-vector path