def get_model_path(system_name, logging=True): """ Parameters ---------- args_name : list of string list of name for parsed argument, taken into account for creating model name Return ------ exp_dir, model_path, log_path """ args_name = [] if system_name == 'xvec': args_name += ['utt', 'seq'] elif system_name == 'ivec': args_name += ['nmix', 'tdim'] else: raise ValueError("No support for system with name: %s" % system_name) args_name += ['mindur', 'minutt'] # ====== base system and feature identity ====== # name = str(system_name).lower() name += '_' + FEATURE_RECIPE.replace('_', '') name += '.' + FEATURE_NAME # ====== concat the attributes ====== # attributes = [] for i in [str(i) for i in args_name]: attributes.append(str(getattr(_args, i))) attributes = '_'.join(attributes) name += '.' + attributes # ====== check the exclude dataset ====== # excluded_dataset = str(_args.exclude).strip() if len(excluded_dataset) > 0: dataset_str = [] for excluded in sorted(set(excluded_dataset.split(','))): assert excluded in sre_file_list or excluded == 'noise', \ "Unknown excluded dataset with name: '%s'" % excluded dataset_str.append(excluded) dataset_str = '_'.join(dataset_str) name += '.' + dataset_str # ====== check save_path ====== # save_path = os.path.join(EXP_DIR, name) if os.path.exists(save_path) and IS_OVERRIDE: print("Override path:", ctext(save_path, 'yellow')) shutil.rmtree(save_path) if not os.path.exists(save_path): os.mkdir(save_path) # ====== return path ====== # log_path = get_logpath(name='log.txt', increasing=True, odin_base=False, root=save_path) model_path = os.path.join(save_path, 'model.ai') if bool(logging): print("Model path:", ctext(model_path, 'cyan')) print("Log path:", ctext(log_path, 'cyan')) return save_path, model_path, log_path
# pp.validate_features(processor, # nb_samples=12, # path=os.path.join(EXP_DIR, 'feature_validation'), # override=True) ds = F.Dataset(PATH_ACOUSTIC_FEATURES, read_only=True) print(ds) indices = list(ds['indices_%s' % args.feat].items()) print("Utterances length:") print(" ", describe([end - start for name, (start, end) in indices], shorten=True)) # =========================================================================== # Basic path for GMM, T-matrix and I-vector # =========================================================================== EXP_DIR = os.path.join(EXP_DIR, '%s_%d_%d' % (FEAT, NMIX, TV_DIM)) LOG_PATH = get_logpath(name='log.txt', override=False, root=EXP_DIR, odin_base=False) stdio(LOG_PATH) print("Exp-dir:", ctext(EXP_DIR, 'cyan')) print("Log path:", ctext(LOG_PATH, 'cyan')) # ====== ivec path ====== # GMM_PATH = os.path.join(EXP_DIR, 'gmm') TMAT_PATH = os.path.join(EXP_DIR, 'tmat') # zero order statistics Z_PATH = (os.path.join(EXP_DIR, 'Z_train'), os.path.join(EXP_DIR, 'Z_test')) # first order statistics F_PATH = (os.path.join(EXP_DIR, 'F_train'), os.path.join(EXP_DIR, 'F_test')) # i-vector path I_PATH = (os.path.join(EXP_DIR, 'I_train'), os.path.join(EXP_DIR, 'I_test')) # labels L_PATH = ( # labels
from __future__ import print_function, division, absolute_import import os import cPickle import requests import webbrowser from twython import Twython from odin import utils # =========================================================================== # Constants # =========================================================================== CONSUMER_KEY = os.environ['TWITTER_KEY'] CONSUMER_SECRET = os.environ['TWITTER_SECRET'] SAVE_PATH = utils.get_logpath('twitter.log', override=False) # =========================================================================== # First authentication # =========================================================================== if not os.path.exists(SAVE_PATH): twitter = Twython(app_key=CONSUMER_KEY, app_secret=CONSUMER_SECRET) user_timeline = twitter.get_user_timeline(screen_name="NgoTrongTrung") auth = twitter.get_authentication_tokens() OAUTH_TOKEN = auth['oauth_token'] OAUTH_TOKEN_SECRET = auth['oauth_token_secret'] # ====== Getting the PIN using verifier URL ====== # webbrowser.open(auth['auth_url']) PIN = raw_input('We will open browser, copy the PIN code here:') if len(PIN) != 7: raise ValueError('PIN must be 7 numbers.')
#!/usr/bin/env python from __future__ import print_function, division, absolute_import import numpy as np from odin.utils import get_modelpath, ArgController, stdio, get_logpath stdio(get_logpath('tmp.log', override=True)) arg = ArgController(version=0.12).add( '-backend', 'theano or tensorflow', 'tensorflow').add('-ds', 'dataset cifar10, or mnist', 'mnist').add('-epoch', 'number of epoch', 3).add('-lr', 'learning rate', 0.01).parse() import os os.environ['ODIN'] = 'float32,gpu,%s,seed=12' % arg['backend'] from odin import backend as K from odin import nnet as N from odin import fuel, training from six.moves import cPickle # =========================================================================== # Load data # =========================================================================== USE_MNIST_DATA = True if 'mnist' in arg['ds'].lower() else False if USE_MNIST_DATA: ds = fuel.load_mnist()
def config_path(self): return os.path.join(get_logpath(), '%s%s.cfg' % (self.__class__.__name__, self._id))
def _transform(self, X): # ====== file input file ====== # raw = None path = None if isinstance(X, Mapping): if 'path' in X: path = X['path'] if 'sr' in X: if self.sr is None: self.sr = X['sr'] self._update_config() self._first_config_generated = True elif self.sr != X['sr']: raise ValueError("Given sample rate: %d, but the audio file has " "sample rate: %d" % (self.sr, X['sr'])) if 'raw' in X: raw = X['raw'] elif is_string(X): path = X elif isinstance(X, np.ndarray): raw = X else: raise ValueError("openSMILE extractor require path to audio file.") # no sample rate specified, cannot generate appropriate config if self.sr is None: raise RuntimeError("Cannot acquire sample rate for the input.") # ====== first time generate config ====== # if not self._first_config_generated: self._first_config_generated = True self._update_config() # ====== extract SAD ====== # unique_id = os.getpid() + random.randint(0, 10e8) inpath = os.path.join( get_logpath(), '%s%d.wav' % (self.__class__.__name__, unique_id)) outpath = os.path.join( get_logpath(), '%s%d.csv' % (self.__class__.__name__, unique_id)) try: if path is None or not os.path.exists(path): if raw is None: raise RuntimeError("openSMILE require input audio file, since " "we cannot find any audio file, it is required to provide " "raw array and sample rate, so the audio file will be cached.") from soundfile import write write(inpath, data=raw, samplerate=self.sr) path = inpath # if in debug mode or not command = 'SMILExtract -loglevel %d -C %s -I %s -O %s' % \ (self._log_level, self.config_path, path, outpath) os.system(command) results = np.genfromtxt(outpath, dtype='float32', delimiter=',', skip_header=0) except Exception as e: import traceback; traceback.print_exc() raise e finally: if os.path.exists(inpath): os.remove(inpath) if os.path.exists(outpath): os.remove(outpath) # ====== post-processing ====== # X_update = self._post_processing(results) if not isinstance(X_update, dict): raise ValueError("_post_processing must return a dictionary.") return X_update
# => Gaussian normalized is better, and float16 is no different from float32 # =========================================================================== from __future__ import print_function, division, absolute_import import numpy as np import os os.environ['ODIN'] = 'float32,gpu,theano,seed=12,cnmem=0.4' from odin import backend as K from odin import nnet as N from odin import fuel, training from odin.utils import get_modelpath, ArgController, stdio, get_logpath from six.moves import cPickle stdio(get_logpath('tmp.log')) # =========================================================================== # Load data # =========================================================================== ds = fuel.load_cifar10() print(ds) X_train = K.placeholder(shape=(None,) + ds['X_train'].shape[1:], name='X_train') X_score = K.placeholder(shape=(None,) + ds['X_train'].shape[1:], name='X_score') y = K.placeholder(shape=(None,), name='y', dtype='int32') # =========================================================================== # Build network # =========================================================================== ops = N.Sequence([
from __future__ import print_function, division, absolute_import import os import requests import webbrowser from twython import Twython from odin import utils CONSUMER_KEY = os.environ['TWITTER_KEY'] CONSUMER_SECRET = os.environ['TWITTER_SECRET'] SAVE_PATH = utils.get_logpath('twitter') # print(auth) twitter = Twython(app_key=CONSUMER_KEY, app_secret=CONSUMER_SECRET, oauth_version=2) ACCESS_TOKEN = twitter.obtain_access_token() twitter = Twython(CONSUMER_KEY, access_token=ACCESS_TOKEN) search_results = twitter.search(q='WebsDotCom', count=50) print(search_results)
from odin import preprocessing as pp from odin import fuel as F, nnet as N, backend as K from odin.utils import (get_module_from_path, get_script_path, ctext, Progbar, stdio, get_logpath, get_formatted_datetime) from odin.stats import describe from helpers import (SCORING_DATASETS, BACKEND_DATASETS, SCORE_SYSTEM_NAME, SCORE_SYSTEM_ID, N_PLDA, N_LDA, PLDA_MAXIMUM_LIKELIHOOD, PLDA_SHOW_LLK, PATH_ACOUSTIC_FEATURES, FEATURE_RECIPE, FEATURE_NAME, get_model_path, NCPU, get_logpath, prepare_dnn_feeder_recipe, sre_file_list, Config, EXP_DIR, VECTORS_DIR, RESULT_DIR, filter_utterances) # ====== scoring log ====== # stdio( get_logpath(name='make_score.log', increasing=True, odin_base=False, root=EXP_DIR)) print('=' * 48) print(get_formatted_datetime(only_number=False)) print("System name :", SCORE_SYSTEM_NAME) print("System id :", SCORE_SYSTEM_ID) print("Feature recipe :", FEATURE_RECIPE) print("Feature name :", FEATURE_NAME) print("Backend dataset:", ','.join(BACKEND_DATASETS.keys())) print("Scoring dataset:", ','.join(SCORING_DATASETS.keys())) print('=' * 48) # =========================================================================== # Some helper # ===========================================================================
# =========================================================================== from __future__ import print_function, division, absolute_import import numpy as np import os os.environ['ODIN'] = 'float32,gpu,theano,seed=12,cnmem=0.4' from odin import backend as K from odin import nnet as N from odin import fuel, training from odin.utils import get_modelpath, ArgController, stdio, get_logpath from six.moves import cPickle stdio(get_logpath('tmp.log')) # =========================================================================== # Load data # =========================================================================== ds = fuel.load_cifar10() print(ds) X_train = K.placeholder(shape=(None, ) + ds['X_train'].shape[1:], name='X_train') X_score = K.placeholder(shape=(None, ) + ds['X_train'].shape[1:], name='X_score') y = K.placeholder(shape=(None, ), name='y', dtype='int32') # =========================================================================== # Build network
from sklearn.metrics import accuracy_score, log_loss, f1_score from odin import fuel as F from odin import nnet as N, backend as K from odin import visual as V from odin.utils import (ctext, mpi, Progbar, catch_warnings_ignore, stdio, get_logpath, catch_warnings_ignore) from helpers import (FEATURE_RECIPE, FEATURE_NAME, PATH_ACOUSTIC_FEATURES, MINIMUM_UTT_DURATION, ANALYSIS_DIR, Config, filter_utterances, prepare_dnn_data) # ====== prepare log ====== # stdio(get_logpath(name="analyze_data.log", increasing=True, odin_base=False, root=ANALYSIS_DIR)) print(ctext(FEATURE_RECIPE, 'lightyellow')) print(ctext(FEATURE_NAME, 'lightyellow')) assert os.path.isdir(os.path.join(PATH_ACOUSTIC_FEATURES, FEATURE_RECIPE)) # ====== essential path ====== # figure_path = os.path.join(ANALYSIS_DIR, '%s_%s.pdf' % (FEATURE_RECIPE.replace('_', ''), FEATURE_NAME)) print(ctext(figure_path, 'lightyellow')) # =========================================================================== # Load the data # =========================================================================== ds = F.Dataset(os.path.join(PATH_ACOUSTIC_FEATURES, FEATURE_RECIPE), read_only=True) X = ds[FEATURE_NAME] # remove all noise data indices = {name: (start, end)
# ====== import ====== # import os os.environ['ODIN'] = 'float32,%s,%s' % (args['dev'], args['bk']) import numpy as np np.random.seed(1208) from odin import nnet as N, backend as K, fuel as F, stats from odin.utils import get_modelpath, stdio, get_logpath, get_datasetpath from odin.basic import has_roles, BIAS, WEIGHT from odin import training # set log path stdio(path=get_logpath('digit_audio.log', override=True)) # =========================================================================== # Get wav and process new dataset configuration # =========================================================================== # ====== process new features ====== # if False: datapath = F.load_digit_wav() output_path = get_datasetpath(name='digit', override=True) feat = F.SpeechProcessor(datapath, output_path, audio_ext='wav', sr_new=8000, win=0.025, shift=0.01, nb_melfilters=40,
def cache_path(self): return utils.get_logpath(self.__class__.__name__, override=False)
args = ArgController( ).add('-model', 'model name, specified in models_cifar.py', 'cnn' ).parse() import os os.environ['ODIN'] = 'float32,gpu,seed=87654321,log' import numpy as np import tensorflow as tf from odin import fuel as F, nnet as N, backend as K, training, utils from odin.stats import train_valid_test_split MODEL_NAME = args.model MODEL_PATH = utils.get_modelpath(name='cifar10_%s' % MODEL_NAME, override=True) LOG_PATH = utils.get_logpath(name='cifar10_%s.log' % MODEL_NAME, override=True) stdio(LOG_PATH) # =========================================================================== # Some handmade constants # =========================================================================== NB_EPOCH = 10 LEARNING_RATE = 0.001 # =========================================================================== # Load dataset # =========================================================================== ds = F.CIFAR10.get_dataset() nb_labels = 10 print(ds) X_train = ds['X_train'][:].astype('float32') / 255. y_train = one_hot(ds['y_train'][:], nb_classes=nb_labels) X_test = ds['X_test'][:].astype('float32') / 255.
from odin import training from odin import preprocessing as pp from odin.ml import evaluate, fast_tsne from odin.visual import (print_dist, print_confusion, print_hist, plot_scatter, plot_figure, plot_spectrogram, plot_save, plot_confusion_matrix, generate_random_colors, generate_random_marker) from odin.utils import (get_logpath, get_modelpath, get_datasetpath, get_figpath, Progbar, unique_labels, chain, as_tuple_of_shape, stdio, ctext, ArgController) # =========================================================================== # Const # =========================================================================== FEAT = ['mspec', 'sad'] MODEL_PATH = get_modelpath(name='DIGITS', override=True) LOG_PATH = get_logpath(name='digits.log', override=True) FIG_PATH = get_figpath(name='DIGITS', override=True) stdio(LOG_PATH) DEBUG = False # ====== trainign ====== # BATCH_SIZE = 32 NB_EPOCH = 20 NB_SAMPLES = 8 VALID_PERCENTAGE = 0.4 # =========================================================================== # Load dataset # =========================================================================== path = get_datasetpath(name='TIDIGITS_feats', override=False) assert os.path.isdir(path), \ "Cannot find preprocessed feature at: %s, try to run 'odin/examples/features.py'" % path
from sklearn.metrics import accuracy_score, log_loss, f1_score from odin import fuel as F from odin import nnet as N, backend as K from odin import visual as V from odin.utils import (ctext, mpi, Progbar, catch_warnings_ignore, stdio, get_logpath, catch_warnings_ignore) from helpers import (FEATURE_RECIPE, FEATURE_NAME, PATH_ACOUSTIC_FEATURES, MINIMUM_UTT_DURATION, ANALYSIS_DIR, Config, filter_utterances, prepare_dnn_data) # ====== prepare log ====== # stdio( get_logpath(name="analyze_data.log", increasing=True, odin_base=False, root=ANALYSIS_DIR)) print(ctext(FEATURE_RECIPE, 'lightyellow')) print(ctext(FEATURE_NAME, 'lightyellow')) assert os.path.isdir(os.path.join(PATH_ACOUSTIC_FEATURES, FEATURE_RECIPE)) # ====== essential path ====== # figure_path = os.path.join( ANALYSIS_DIR, '%s_%s.pdf' % (FEATURE_RECIPE.replace('_', ''), FEATURE_NAME)) print(ctext(figure_path, 'lightyellow')) # =========================================================================== # Load the data # =========================================================================== ds = F.Dataset(os.path.join(PATH_ACOUSTIC_FEATURES, FEATURE_RECIPE), read_only=True) X = ds[FEATURE_NAME]
stop_on_failure=True) processor.run() # pp.validate_features(processor, # nb_samples=12, # path=os.path.join(EXP_DIR, 'feature_validation'), # override=True) ds = F.Dataset(PATH_ACOUSTIC_FEATURES, read_only=True) print(ds) indices = list(ds['indices_%s' % args.feat].items()) print("Utterances length:") print(" ", describe([end - start for name, (start, end) in indices], shorten=True)) # =========================================================================== # Basic path for GMM, T-matrix and I-vector # =========================================================================== EXP_DIR = os.path.join(EXP_DIR, '%s_%d_%d' % (FEAT, NMIX, TV_DIM)) LOG_PATH = get_logpath(name='log.txt', override=False, root=EXP_DIR, odin_base=False) stdio(LOG_PATH) print("Exp-dir:", ctext(EXP_DIR, 'cyan')) print("Log path:", ctext(LOG_PATH, 'cyan')) # ====== ivec path ====== # GMM_PATH = os.path.join(EXP_DIR, 'gmm') TMAT_PATH = os.path.join(EXP_DIR, 'tmat') # zero order statistics Z_PATH = ( os.path.join(EXP_DIR, 'Z_train'), os.path.join(EXP_DIR, 'Z_test')) # first order statistics F_PATH = ( os.path.join(EXP_DIR, 'F_train'), os.path.join(EXP_DIR, 'F_test')) # i-vector path