Пример #1
0
    def __init__(self,
                 ticker,
                 chunks=9,
                 delta=0,
                 n_hidden_states=5,
                 n_latency_days=10,
                 n_steps_frac_change=10,
                 n_steps_frac_high=30,
                 n_steps_frac_low=10,
                 n_iter=100,
                 verbose=False,
                 prediction_date=None):

        self.total_score = 0
        self.verbose = verbose
        self.ticker = ticker
        self.n_latency_days = n_latency_days
        self.hmm = GMMHMM(n_components=n_hidden_states, n_iter=n_iter)
        self.chunks = chunks
        self.delta = delta
        self.prediction_date = prediction_date
        self.fetch_training_data()
        self._compute_all_possible_outcomes(n_steps_frac_change,
                                            n_steps_frac_high,
                                            n_steps_frac_low)
Пример #2
0
def trainModelGMM(X, lengths, states, num_gaus):

    model = GMMHMM(n_components=states, n_mix=num_gaus,n_iter=1000,verbose=True).fit(X,lengths)

    print('Mixture Models + HMM')
    print(model.predict(X))
    print(model.monitor_.converged)
    print(model.monitor_)
    print(model.score(X, lengths))
Пример #3
0
    def _defineModelParameters(self):

        # Define the model:
        #self.model = GaussianHMM(n_components=2,
        #                         covariance_type="full",
        #                         n_iter=200,
        #                         verbose=True)
        self.model = GMMHMM(n_components=2,
                            covariance_type="full",
                            n_iter=20,
                            verbose=True)
Пример #4
0
class HMMStats(Stats):
    def __init__(self, stats: Stats = None):
        super().__init__()

        if stats:
            self.__dict__.update(deepcopy(stats.__dict__))

        n_iter = 100  # maximum number of iterations
        tol = 0.1  # minimum value of log-likelyhood
        covariance_type = 'diag'
        self.onshmm = GMMHMM(
            n_components=20,  # the number of gaussian mixtures
            n_mix=30,  # the number of hidden states
            covariance_type=covariance_type,
            n_iter=n_iter,
            tol=tol,
            verbose=True,
            random_state=self.seed())
        self.durhmm = GMMHMM(n_components=2,
                             n_mix=3,
                             covariance_type=covariance_type,
                             n_iter=n_iter,
                             tol=tol,
                             verbose=True,
                             random_state=self.seed())

    def get_random_onset_diff(self, k=1):
        x, _state_seq = self.onshmm.sample(k, random_state=self.seed())
        return x[:, 0]

    def get_random_duration_ratio(self, k=1):
        x, _state_seq = self.durhmm.sample(k, random_state=self.seed())
        return x[:, 0]

    def train_on_filled_stats(self):
        super().train_on_filled_stats()

        # train the hmms
        def train(hmm, data, lengths):
            hmm.fit(data, lengths)
            if (hmm.monitor_.converged):
                print("hmm converged!")
            else:
                print("hmm did not converge!")

        print("Training duration hmm...")
        train(self.durhmm, self.dur_ratios, self.dur_lengths)
        print("Training onset hmm...")
        train(self.onshmm, self.ons_diffs, self.ons_lengths)

    def __repr__(self):
        return str(type(self))
    def __init__(self, _model):
        super(GMMHMMTrainer, self).__init__(_model)

        hmm_params = _model['hmmParams']
        gmm_params = _model['gmmParams']
        n_iter = _model.get('nIter', 50)

        transmat = np.array(hmm_params['transMat'])
        transmat_prior = np.array(hmm_params['transMatPrior'])
        n_component = hmm_params['nComponent']
        startprob = np.array(hmm_params['startProb'])
        startprob_prior = np.array(hmm_params['startProbPrior'])

        n_mix = gmm_params['nMix']
        covariance_type = gmm_params['covarianceType']
        gmms = gmm_params.get('gmms', None)

        gmm_obj_list = []
        if not gmms:
            gmm_obj_list = None
        else:
            for gmm in gmms:
                gmm_obj = GMM(n_components=gmm['nComponent'], covariance_type=gmm['covarianceType'])
                gmm_obj.covars_ = np.array(gmm['covars'])
                gmm_obj.means_ = np.array(gmm['means'])
                gmm_obj.weights_ = np.array(gmm['weights'])
                gmm_obj_list.append(gmm_obj)

        self.gmmhmm = GMMHMM(n_components=n_component, n_mix=n_mix, gmms=gmm_obj_list,
                             n_iter=n_iter, covariance_type=covariance_type,
                             transmat=transmat, transmat_prior=transmat_prior,
                             startprob=startprob, startprob_prior=startprob_prior)
Пример #6
0
    def __init__(n_components=N_COMPONENTS,
                 n_mix=N_MIX,
                 n_iters=N_ITERS,
                 cov_type=COV_TYPE,
                 algorithm=ALGORITHM,
                 tol=TOL,
                 startpr_prior=STARTPROB_PRIOR,
                 trans_prior=TRANSMAT_PRIOR,
                 init_params=INIT_PARAMS,
                 params=PARAMS,
                 verbose=VERBOSE,
                 class_names=CONDITION_CLASSES):

        self.model = {}
        for class_name in class_names:
            self.model[class_name] = GMMHMM(n_components=N_COMPONENTS,
                                            n_mix=N_MIX,
                                            n_iter=N_ITERS,
                                            covariance_type=COV_TYPE,
                                            algorithm=ALGORITHM,
                                            tol=TOL,
                                            transmat_prior=TRANSMAT_PRIOR,
                                            startprob_prior=STARTPROB_PRIOR,
                                            init_params=INIT_PARAMS,
                                            params=PARAMS,
                                            verbose=VERBOSE)
Пример #7
0
def initialize(phones):
    models = {}
    start_prob = np.array([1.0, 0.0, 0.0])
    transmat = np.zeros((3, 3))
    for i in range(3):
        for j in range(i, 3):
            trans = 1 / (3 - i)
            transmat[i][j] = trans
    for i in phones.keys():
        new_hmm = GMMHMM(n_components=3,
                         n_mix=5,
                         params='tmc',
                         init_params='mc')
        new_hmm.startprob_ = start_prob
        new_hmm.transmat_ = transmat
        models[i] = new_hmm
    return models
Пример #8
0
def classifyByGMMHMM(seq, models, configs):

    Y = []
    for config in configs:
        _rawdata_type = config["logType"]
        _event_type = config["eventType"]
        _motion_type = config["motionType"]
        _sound_type = config["soundType"]
        _location_type = config["locationType"]

        d = Dataset(
            rawdata_type=_rawdata_type,
            event_type=_event_type,
            motion_type=_motion_type,
            sound_type=_sound_type,
            location_type=_location_type
        )
        # Initiation of data need prediction.
        y = np.array(d._convetNumericalSequence(seq))
        Y.append(y)


    _GMMHMMs = []
    for model in models:
        _GMMs = []
        for gmm in model["gmmParams"]["params"]:
            _GMM = GMM(
                n_components=model["nMix"],
                covariance_type=model["covarianceType"]
            )
            _GMM.covars_  = np.array(gmm["covars"])
            _GMM.means_   = np.array(gmm["means"])
            _GMM.weights_ = np.array(gmm["weights"])
            _GMMs.append(_GMM)
        _GMMHMM = GMMHMM(
            n_components=model["nComponent"],
            n_mix=model["nMix"],
            startprob=np.array(model["hmmParams"]["startProb"]),
            transmat=np.array(model["hmmParams"]["transMat"]),
            gmms=_GMMs,
            covariance_type=model["covarianceType"]
        )
        _GMMHMMs.append(_GMMHMM)

    results = []
    # for _GMMHMM in _GMMHMMs:
        # res = _GMMHMM.score(Y)
        # results.append(res)
    for i in range(0, len(models)):
        res = _GMMHMMs[i].score(Y[i])
        results.append(res)

    return results
Пример #9
0
    def __init__(self,
                 ticker,
                 n_hidden_states=5,
                 n_latency_days=10,
                 n_steps_frac_change=50,
                 n_steps_frac_high=30,
                 n_steps_frac_low=10,
                 n_iter=1000,
                 verbose=False):

        self.verbose = verbose
        self.ticker = ticker
        self.n_latency_days = n_latency_days

        self.hmm = GMMHMM(n_components=n_hidden_states, n_iter=n_iter)

        self.fetch_training_data()
        self.fetch_latest_data()  # to predict

        self._compute_allall_possible_outcomes(n_steps_frac_change,
                                               n_steps_frac_high,
                                               n_steps_frac_low)
Пример #10
0
    def __init__(self, _model):
        super(GMMHMMTrainer, self).__init__(_model)

        hmm_params = _model['hmmParams']
        gmm_params = _model['gmmParams']
        n_iter = _model.get('nIter', 50)

        transmat = np.array(hmm_params['transMat'])
        transmat_prior = np.array(hmm_params['transMatPrior'])
        n_component = hmm_params['nComponent']
        startprob = np.array(hmm_params['startProb'])
        startprob_prior = np.array(hmm_params['startProbPrior'])

        n_mix = gmm_params['nMix']
        covariance_type = gmm_params['covarianceType']
        gmms = gmm_params.get('gmms', None)

        gmm_obj_list = []
        if not gmms:
            gmm_obj_list = None
        else:
            for gmm in gmms:
                gmm_obj = GMM(n_components=gmm['nComponent'],
                              covariance_type=gmm['covarianceType'])
                gmm_obj.covars_ = np.array(gmm['covars'])
                gmm_obj.means_ = np.array(gmm['means'])
                gmm_obj.weights_ = np.array(gmm['weights'])
                gmm_obj_list.append(gmm_obj)

        self.gmmhmm = GMMHMM(n_components=n_component,
                             n_mix=n_mix,
                             gmms=gmm_obj_list,
                             n_iter=n_iter,
                             covariance_type=covariance_type,
                             transmat=transmat,
                             transmat_prior=transmat_prior,
                             startprob=startprob,
                             startprob_prior=startprob_prior)
Пример #11
0
    def __init__(self, stats: Stats = None):
        super().__init__()

        if stats:
            self.__dict__.update(deepcopy(stats.__dict__))

        n_iter = 100  # maximum number of iterations
        tol = 0.1  # minimum value of log-likelyhood
        covariance_type = 'diag'
        self.onshmm = GMMHMM(
            n_components=20,  # the number of gaussian mixtures
            n_mix=30,  # the number of hidden states
            covariance_type=covariance_type,
            n_iter=n_iter,
            tol=tol,
            verbose=True,
            random_state=self.seed())
        self.durhmm = GMMHMM(n_components=2,
                             n_mix=3,
                             covariance_type=covariance_type,
                             n_iter=n_iter,
                             tol=tol,
                             verbose=True,
                             random_state=self.seed())
Пример #12
0
def train_GMMs(emotions, trng_data, GMM_config, pickle_path, use_pickle = False):
	"""
		Utility function to train GMMHMMs based on entered confiuration and training data. Returns a dictionary of trained GMMHMM objects and also pickles them for use without training.
	"""
	emo_machines = {}
	if not use_pickle:
		for emo in emotions:
			emo_machines[emo] = GMMHMM(n_components=GMM_config[emo]["n_components"],n_mix=GMM_config[emo]["n_mix"])
			if trng_data[emo]:
				#print np.shape(trng_data[emo])
				emo_machines[emo].fit(trng_data[emo])
		pickle.dump(emo_machines, open(pickle_path,"wb"))
	else:
		emo_machines = pickle.load(open(pickle_path,"rb"))
	return emo_machines
Пример #13
0
def hmm_train(features):

    gmmhmm = GMMHMM(n_components=30, n_mix=8)
    gmmhmm.startprob_ = np.array([
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0
    ])
    l = np.identity(30) * 0.95
    for i in range(l.shape[0] - 1):
        l[i, i + 1] = 0.05
    l[-1, -1] = 1
    gmmhmm.transmat_ = l
    gmmhmm.fit(features)
    preds = gmmhmm.predict(features)
    print(preds)
Пример #14
0
def fit_HMM(feedback, n_components=2, hmm='GaussianHMM'):
    assert hmm == 'GaussianHMM' or hmm == 'GMMHMM', "You have to choose between GaussianHMM or GMMHMM"

    y = feedback[:, 1]

    if hmm == 'GaussianHMM':
        model = GaussianHMM(n_components=n_components)
    else:
        model = GMMHMM(n_components=n_components)

    model.fit(y.reshape(len(y), 1))

    states = model.predict(y.reshape(len(y), 1))
    mus = np.array(model.means_)
    sigmas = np.array(np.sqrt(np.array([np.diag(model.covars_[0]), np.diag(model.covars_[1])])))
    P = np.array(model.transmat_)

    return y, states, mus, sigmas, P
Пример #15
0
def get_GMMs(labels, trng_data=None, GMM_config=None,
             model_path="models/gmmhmm.pkl",
             from_file=False):
    """
        Utility function to train or load GMMHMMs based on entered
        configuration and training data.
        Returns a dictionary of trained GMMHMM objects.
    """
    gmms = {}
    if not from_file:
        for label in labels:
            gmms[label] = GMMHMM(
                n_components=GMM_config[label]["n_components"],
                n_mix=GMM_config[label]["n_mix"])
            if trng_data[label]:
                # print np.shape(trng_data[wav_file])
                gmms[label].fit(np.vstack(trng_data[label]))
                # emo_machines[wav_file].fit(trng_data[wav_file])
        pickle.dump(gmms, open(model_path, "wb"))
    else:
        gmms = pickle.load(open(model_path, "rb"))
    return gmms
Пример #16
0
    def __init__(self, _models):
        super(GMMHMMClassifier, self).__init__(_models)
        self.gmmhmms = {}
        self.predict_data_ = None

        for label, value in _models.iteritems():
            _model = value['param']
            hmm_params = _model['hmmParams']
            gmm_params = _model['gmmParams']
            n_iter = _model.get('nIter', 50)

            transmat = np.array(hmm_params['transMat'])
            transmat_prior = np.array(hmm_params['transMatPrior'])
            n_component = hmm_params['nComponent']
            startprob = np.array(hmm_params['startProb'])
            startprob_prior = np.array(hmm_params['startProbPrior'])

            n_mix = gmm_params['nMix']
            covariance_type = gmm_params['covarianceType']
            gmms = gmm_params.get('gmms', None)

            gmm_obj_list = []
            if not gmms:
                gmm_obj_list = None
            else:
                for gmm in gmms:
                    gmm_obj = GMM(n_components=gmm['nComponent'], covariance_type=gmm['covarianceType'])
                    gmm_obj.covars_ = np.array(gmm['covars'])
                    gmm_obj.means_ = np.array(gmm['means'])
                    gmm_obj.weights_ = np.array(gmm['weights'])
                    gmm_obj_list.append(gmm_obj)

            gmmhmm = GMMHMM(n_components=n_component, n_mix=n_mix, gmms=gmm_obj_list,
                            n_iter=n_iter, covariance_type=covariance_type,
                            transmat=transmat, transmat_prior=transmat_prior,
                            startprob=startprob, startprob_prior=startprob_prior)
            self.gmmhmms[label] = {'gmmhmm': gmmhmm, 'status_set': value['status_set']}
Пример #17
0
 def get_trained_pipelines(train):
     train_dfs = np.array_split(train, n_subsets)
     int_name = 0
     pipelines = []
     for train_subset in train_dfs:
         try:
             pipe_pca = make_pipeline(StandardScaler(),
                         PrincipalComponentAnalysis(n_components=n_components),
                         GMMHMM(n_components=n_components, covariance_type='full', n_iter=150, random_state=7),
                         )
             pipe_pca.fit(train_subset[ features ])
             train['state'] = pipe_pca.predict(train[ features ])
             results = pd.DataFrame(train.groupby(by=['state'])['return'].mean().sort_values())
             results['new_state'] = list(range(n_components))
             results.columns = ['mean', 'new_state']
             results = results.reset_index()
             results['name'] = int_name
             int_name = int_name + 1
             pipelines.append( [pipe_pca, results] )
         except Exception as e:
             #print('make trained pipelines exception', e)
             pass
     
     return pipelines
Пример #18
0
samples_raw_2, labels_2, _ = FileReader.read(FILE_PATH_2)
samples_raw_2 = samples_raw_2[(labels_2.ravel() == 0) | (labels_2.ravel() == 1), :]
labels_2 = labels_2[(labels_2.ravel() == 0) | (labels_2.ravel() == 1)]

samples_raw_3, labels_3, _ = FileReader.read(FILE_PATH_3)
samples_raw_3 = samples_raw_3[:, 0:6]

window_size = 100

X_train, X_test, y_train, y_test = train_test_split(np.vstack([samples_raw_1, samples_raw_2, samples_raw_3]),
                                                    np.vstack([labels_1, labels_2, labels_3]), train_size=0.6)

samples_healthy = X_train[y_train.ravel() == 0, :]
samples_unhealthy = X_train[y_train.ravel() == 1, :]

model_healthy = GMMHMM()
model_unhealthy = GMMHMM()

seqs, lengths = PreProcessor.split2sequences(samples_healthy, window_size)
model_healthy.fit(seqs, lengths)

seqs, lengths = PreProcessor.split2sequences(samples_unhealthy, window_size)
model_unhealthy.fit(seqs, lengths)

seqs, lengths = PreProcessor.split2sequences(X_test, window_size)
accuracy = 0
for i in range(0, len(lengths)):
    ll_healthy, post_healthy = model_healthy.score_samples(seqs[i*window_size:(i+1)*window_size,:])
    ll_unhealthy, post_unhealthy = model_unhealthy.score_samples(seqs[i*window_size:(i+1)*window_size,:])

    print("[" + str(ll_unhealthy) + "|" + str(ll_unhealthy) + "]")
Пример #19
0
    def fit(self, X, Y):
        '''
        creates a separate hmm for each label in Y
        '''

        labels = []

        for l in Y:
            if l not in labels:
                labels.append(l)

        labels = sorted(labels)

        self.n_class = len(labels)

        print(' doing something ')

        # try:
        #     X = np.load('X_mfcc.npy')
        #
        # except

        X = self.extract_features(X)
        np.save('X_mfcc.npy', X)

        print(' * finished extracting features * ')

        self.hmm_set = []

        self.hmm_set.append(GMMHMM(n_components=self.n_states,
                              n_mix=self.n_mixtures,
                              verbose=True,
                              n_iter=100) )

        self.hmm_set.append(GMMHMM(n_components=self.n_states,
                              n_mix=self.n_mixtures,
                              verbose=True,
                              n_iter=100) )

        self.hmm_set.append(GMMHMM(n_components=self.n_states,
                              n_mix=self.n_mixtures,
                              verbose=True,
                              n_iter=100) )

        self.hmm_set.append(GMMHMM(n_components=self.n_states,
                              n_mix=self.n_mixtures,
                              verbose=True,
                              n_iter=100) )

        print(self.hmm_set)

        class_data = [[] for _ in range(self.n_class)]
        lengths = [[] for _ in range(self.n_class)]

        print(' * preprocessing * ')

        for i, data in enumerate(X):
            class_data[int(Y[i])].append(data)
            lengths[int(Y[i])].append(data.shape[0])

        print(' * finished preprocessing * ')

        for ci in range(self.n_class):
            print('fitting {}'.format(ci))
            to_fit = np.concatenate(class_data[ci])

            print(to_fit[0].shape)
            print(to_fit.shape)

            self.hmm_set[ci].fit(to_fit, lengths[ci])

            if np.any(self.hmm_set[ci].covars_ <= 0):
                print('some covariances are 0. model might be a poor fit')
                self.hmm_set[ci].covars_ = np.abs(self.hmm_set[ci].covars_) + 1e-10

            if np.any(self.hmm_set[ci].transmat_ == np.nan):
                raise ArithmeticError('transition probabilities are unndefined. '
                                      'Try reducing the number of states')
Пример #20
0
class GMMHMMTrainer(BaseTrainer):
    '''A wrapper to GMMHMM

    Attributes
    ----------
    _model: init params
    gmmhmm: hmmlearn GMMHMM instance
    params_: params after fit
    train_data_: current train datas
    '''
    def __init__(self, _model):
        super(GMMHMMTrainer, self).__init__(_model)

        hmm_params = _model['hmmParams']
        gmm_params = _model['gmmParams']
        n_iter = _model.get('nIter', 50)

        transmat = np.array(hmm_params['transMat'])
        transmat_prior = np.array(hmm_params['transMatPrior'])
        n_component = hmm_params['nComponent']
        startprob = np.array(hmm_params['startProb'])
        startprob_prior = np.array(hmm_params['startProbPrior'])

        n_mix = gmm_params['nMix']
        covariance_type = gmm_params['covarianceType']
        gmms = gmm_params.get('gmms', None)

        gmm_obj_list = []
        if not gmms:
            gmm_obj_list = None
        else:
            for gmm in gmms:
                gmm_obj = GMM(n_components=gmm['nComponent'],
                              covariance_type=gmm['covarianceType'])
                gmm_obj.covars_ = np.array(gmm['covars'])
                gmm_obj.means_ = np.array(gmm['means'])
                gmm_obj.weights_ = np.array(gmm['weights'])
                gmm_obj_list.append(gmm_obj)

        self.gmmhmm = GMMHMM(n_components=n_component,
                             n_mix=n_mix,
                             gmms=gmm_obj_list,
                             n_iter=n_iter,
                             covariance_type=covariance_type,
                             transmat=transmat,
                             transmat_prior=transmat_prior,
                             startprob=startprob,
                             startprob_prior=startprob_prior)

    def __repr__(self):
        return '<GMMHMMTrainer instance>\n\tinit_models:%s\n\tparams:%s\n\ttrain_data:%s' % (
            self._model, self.params_, self.train_data_)

    def fit(self, train_data):
        train_data = np.array(train_data)
        self.gmmhmm.fit(train_data)

        gmms_ = []
        for gmm in self.gmmhmm.gmms_:
            gmms_.append({
                'nComponent': gmm.n_components,
                'nIter': gmm.n_iter,
                'means': gmm.means_.tolist(),
                'covars': gmm.covars_.tolist(),
                'weights': gmm.weights_.tolist(),
                'covarianceType': gmm.covariance_type,
            })
        self.train_data_ += train_data.tolist()
        self.params_ = {
            'nIter': self.gmmhmm.n_iter,
            'hmmParams': {
                'nComponent': self.gmmhmm.n_components,
                'transMat': self.gmmhmm.transmat_.tolist(),
                'transMatPrior': self.gmmhmm.transmat_prior.tolist(),
                'startProb': self.gmmhmm.startprob_.tolist(),
                'startProbPrior': self.gmmhmm.startprob_prior.tolist(),
            },
            'gmmParams': {
                'nMix': self.gmmhmm.n_mix,
                'covarianceType': self.gmmhmm.covariance_type,
                'gmms': gmms_,
            }
        }
Пример #21
0
        class_vectors = dataset[cname]
        #     use Multinominal HMM
        #     dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
        #     hmm = hmmlearn.hmm.MultinomialHMM(
        #         n_components=20, random_state=0, n_iter=1000, verbose=True
        #     )

        hmm = GMMHMM(
            n_components=5,
            n_mix=1,
            random_state=42,
            n_iter=1000,
            verbose=True,
            params='mctw',
            init_params='mc',
            #         startprob_prior = np.array([1.0,0.0,0.0,0.0,0.0]),
            #         transmat_prior = np.array([
            #             [0.7,0.3,0.0,0.0,0.0],
            #             [0.0,0.7,0.3,0.0,0.0],
            #             [0.0,0.0,0.7,0.3,0.0],
            #             [0.0,0.0,0.0,0.7,0.3],
            #             [0.0,0.0,0.0,0.0,1.0],
            #         ])
        )
        hmm.startprob_ = np.array([1.0, 0.0, 0.0, 0.0, 0.0])
        #     hmm.transmat_ = np.array([
        #         [0.7,0.3,0.0,0.0,0.0],
        #         [0.0,0.7,0.3,0.0,0.0],
        #         [0.0,0.0,0.7,0.3,0.0],
        #         [0.0,0.0,0.0,0.7,0.3],
        #         [0.0,0.0,0.0,0.0,1.0],
Пример #22
0
def newtrain(speakers, name):
    #folder="C:/Anaconda codes/speaker reco/something new/for hack/add new people/"
    folder = "C:/Anaconda codes/Hackverse/servermodel/clientfiles/"
    s = list(speakers)
    l = len(speakers)
    #name= input("enter your name")

    speakers.append(name)

    new_person = speakers[l]

    #rint(new_person)

    try:
        os.makedirs("clientfiles/dataset/" + name)
    except:
        print("already exists")
        return (s)
    #os.mkdir(folder+"dataset/"+ name)

    x = "clientfiles/dataset/" + name + "/"
    samples(x)

    training_speaker_name = name

    file_path = x
    file_names = os.listdir(file_path)
    #print((len(file_names)))

    lengths = np.empty(len(file_names))
    #print(np.shape(lengths))

    feature_vectors = np.empty([20, 0])

    for i in range(len(file_names)):
        x, rate = librosa.load(file_path + file_names[i])  #loads the file
        #rate, x = wavfile.read(file_names[i])
        x = librosa.feature.mfcc(y=x[0:int(len(x) / 1.25)],
                                 sr=rate)  #extracts mfcc

        #x = mfcc(x[0:len(x)/1.25], samplerate=rate)
        lengths[i] = int(len(x.transpose()))

        #print(np.shape(x))

        feature_vectors = np.concatenate((feature_vectors, x), axis=1)
        #feature_vectors = np.vstack((feature_vectors, x.transpose()))

    #print(((lengths)))
    #print(np.shape(feature_vectors))

    #TRAINING A MODEL

    N = 3  # Number of States of HMM
    Mixtures = 64  # Number of Gaussian Mixtures.

    model = GMMHMM(n_components=N, n_mix=Mixtures, covariance_type='diag')

    startprob = np.ones(N) * (10**(-30))  # Left to Right Model
    startprob[0] = 1.0 - (N - 1) * (10**(-30))
    transmat = np.zeros([N, N])  # Initial Transmat for Left to Right Model
    #print(startprob,'\n',transmat)
    for i in range(N):
        for j in range(N):
            transmat[i, j] = 1 / (N - i)
    transmat = np.triu(transmat, k=0)
    transmat[transmat == 0] = (10**(-30))

    model = GMMHMM(n_components=N,
                   n_mix=Mixtures,
                   covariance_type='diag',
                   init_params="mcw",
                   n_iter=100)

    model.startprob_ = startprob
    model.transmat_ = transmat
    #print(startprob,'\n',transmat)

    feature = feature_vectors.transpose()
    #print(np.shape(feature))

    lengths = [int(x) for x in lengths]
    #print(type(lengths[0]))

    model.fit(feature, lengths)

    joblib.dump(model, folder + "/models/" + name + ".pkl")
    return (speakers)
Пример #23
0
class StockPredictor(object):
    def __init__(self,
                 ticker,
                 chunks=9,
                 delta=0,
                 n_hidden_states=5,
                 n_latency_days=10,
                 n_steps_frac_change=10,
                 n_steps_frac_high=30,
                 n_steps_frac_low=10,
                 n_iter=100,
                 verbose=False,
                 prediction_date=None):

        self.total_score = 0
        self.verbose = verbose
        self.ticker = ticker
        self.n_latency_days = n_latency_days
        self.hmm = GMMHMM(n_components=n_hidden_states, n_iter=n_iter)
        self.chunks = chunks
        self.delta = delta
        self.prediction_date = prediction_date
        self.fetch_training_data()
        self._compute_all_possible_outcomes(n_steps_frac_change,
                                            n_steps_frac_high,
                                            n_steps_frac_low)

    def fetch_training_data(self):

        print("Fetching training data ...")
        res = es.search(index="market",
                        doc_type="quote",
                        size=10000,
                        body={"query": {
                            "match": {
                                "ticker": self.ticker
                            }
                        }})
        self.training_data = json_normalize(res['hits']['hits'])
        self.chunked_training_data = self.training_data

        #vectors = []
        #chunked_training_data_lengths = []
        #start_index = 0
        #end_index = start_index + self.chunks
        #delta_date_index = end_index + self.delta

        #while delta_date_index <= len(self.training_data):
        #training_chunk = self.training_data[start_index:end_index]
        #    delta_chunk = self.training_data.iloc[delta_date_index]
        #    total_chunk = training_chunk.append(delta_chunk)
        #    #print("%s training_chunk to train %s" % (total_chunk, self.ticker))
        #    start_index = end_index + 1
        #    end_index = start_index + self.chunks
        #    delta_date_index = end_index + self.delta
        #    vectors.append(total_chunk)
        #    chunked_training_data_lengths.append(len(total_chunk))
        #    if self.verbose: print(total_chunk)

        #self.chunked_training_data = pd.DataFrame(np.concatenate(vectors), columns = self.training_data.columns)
        #self.chunked_training_data_lengths = chunked_training_data_lengths

        if self.verbose:
            print("Latest record for training:\n%s" %
                  self.chunked_training_data.tail(1))
        latest_date = self.chunked_training_data.tail(1)['_source.timestamp']
        datetime_object = datetime.datetime.strptime(latest_date.values[0],
                                                     '%Y-%m-%dT%H:%M:%S')

        if self.prediction_date == None:
            prediction_date = datetime_object + timedelta(days=self.delta + 1)
            self.prediction_date = datetime.datetime.strftime(
                prediction_date, '%Y-%m-%dT%H:%M:%S')

    @staticmethod
    def _extract_features(data):

        frac_change = np.array(
            data['_source.change'])  #(close_price - open_price) / open_price
        frac_high = np.array(data['_source.change_high']
                             )  #(high_price - open_price) / open_price
        frac_low = np.array(
            data['_source.change_low'])  #(open_price - low_price) / open_price

        return np.column_stack((frac_change, frac_high, frac_low))

    def fit(self):
        print('Extracting Features')
        feature_vector = StockPredictor._extract_features(
            self.chunked_training_data)
        if self.verbose: print("feature vector %s" % feature_vector)
        print('Training Model with %s features' % feature_vector.size)
        print(
            "Latest date to be used in training is %s" %
            self.chunked_training_data.tail(1)['_source.timestamp'].values[0])
        #self.hmm.fit(feature_vector, self.chunked_training_data_lengths)
        self.hmm.fit(feature_vector)
        print('Model trained')

    def _compute_all_possible_outcomes(self, n_steps_frac_change,
                                       n_steps_frac_high, n_steps_frac_low):
        frac_change_range = np.linspace(-0.1, 0.1, n_steps_frac_change)
        frac_high_range = np.linspace(0, 0.05, n_steps_frac_high)
        frac_low_range = np.linspace(0, 0.05, n_steps_frac_low)

        self.all_possible_outcomes = np.array(
            list(
                itertools.product(frac_change_range, frac_high_range,
                                  frac_low_range)))

    def json_data_for_trade(self):

        rows = list()

        # meta
        ticker = self.ticker
        date = self.prediction_date
        total_score = self.total_score
        id = "%s-%s-%s" % (ticker, date, total_score)

        meta = {
            "index": {
                "_index": TRADE_INDEX_NAME,
                "_type": TRADE_TYPE_NAME,
                "_id": id
            }
        }
        rows.append(json.dumps(meta))

        # data
        row = ObjDict()
        row.total_score = total_score
        row.timestamp = self.prediction_date
        row.ticker = self.ticker
        rows.append(json.dumps(row))

        return rows

    def json_data_for_outcome(self, outcome, score):

        rows = list()

        # meta
        ticker = self.ticker
        date = self.prediction_date
        vector = outcome
        id = "%s-%s-%s" % (ticker, date, vector)

        meta = {"index": {"_index": INDEX_NAME, "_type": TYPE_NAME, "_id": id}}
        rows.append(json.dumps(meta))

        # data
        row = ObjDict()
        row.frac_change = outcome[0]
        row.frac_high_range = outcome[1]
        row.frac_low_range = outcome[2]
        open_price = self.training_data.tail(1)['_source.open'].values[0]
        predicted_close = open_price * (1 + outcome[0])
        expected_value = outcome[0] * score
        row.predicted_close = predicted_close
        row.expected_value = expected_value
        row.timestamp = self.prediction_date
        row.score = score
        row.chunks = self.chunks
        row.delta = self.delta
        row.score = score
        row.ticker = self.ticker
        rows.append(json.dumps(row))

        return rows

    def delete_prediction_data(self, ticker):
        print("Deleting prediction data for ... %s" % self.ticker)
        es.delete_by_query(index=INDEX_NAME,
                           doc_type=TYPE_NAME,
                           body={'query': {
                               'match': {
                                   'ticker': self.ticker
                               }
                           }})

    def predict_outcomes(self):

        print("predicting outcomes for: %s" % self.prediction_date)
        previous_testing_data = self.training_data.tail(
            self.n_latency_days).index

        if self.verbose:
            print("previous_testing_data %s" % previous_testing_data)

        test_data = self.training_data.iloc[previous_testing_data]

        if self.verbose:
            print("Using the following slice of data:")
            print("[%s]" % previous_testing_data)
            print(test_data)

        test_data_features = StockPredictor._extract_features(test_data)

        # to blow everything away - may need to recreate/refresh indexes in ES!
        #self.delete_and_create_index()

        bulk_data = list()
        trade_data = list()
        outcome_score = []

        for possible_outcome in self.all_possible_outcomes:

            test_feature_vectors = np.row_stack(
                (test_data_features, possible_outcome))
            score = self.hmm.score(test_feature_vectors)

            # ignoring scores <= 0
            if score > 0:
                rows = self.json_data_for_outcome(possible_outcome, score)
                bulk_data.append(rows)

                if possible_outcome[0] > 0:
                    self.total_score = self.total_score + score
                if possible_outcome[0] < 0:
                    self.total_score = self.total_score - score
                trade_rows = self.json_data_for_trade()
                trade_data.append(trade_rows)

        print("Exporting predictions to ES")

        es_array = self.format_data_for_es(bulk_data)
        res = es.bulk(index=INDEX_NAME, body=es_array, refresh=True)

        es_array = self.format_data_for_es(trade_data)
        res = es.bulk(index=TRADE_INDEX_NAME, body=es_array, refresh=True)

    def format_data_for_es(self, data):
        es_array = ""
        for row in data:
            es_array += row[0]
            es_array += "\n"
            es_array += row[1]
            es_array += "\n"
        return es_array
Пример #24
0
    lengths[i] = int(len(x.transpose()))

    print(np.shape(x))

    feature_vectors = np.concatenate((feature_vectors, x), axis=1)
    #feature_vectors = np.vstack((feature_vectors, x.transpose()))

print(((lengths)))
print(np.shape(feature_vectors))

#TRAINING A MODEL

N = 3  # Number of States of HMM
Mixtures = 64  # Number of Gaussian Mixtures.

model = GMMHMM(n_components=N, n_mix=Mixtures, covariance_type='diag')

startprob = np.ones(N) * (10**(-30))  # Left to Right Model
startprob[0] = 1.0 - (N - 1) * (10**(-30))
transmat = np.zeros([N, N])  # Initial Transmat for Left to Right Model
print(startprob, '\n', transmat)
for i in range(N):
    for j in range(N):
        transmat[i, j] = 1 / (N - i)
transmat = np.triu(transmat, k=0)
transmat[transmat == 0] = (10**(-30))

model = GMMHMM(n_components=N,
               n_mix=Mixtures,
               covariance_type='diag',
               init_params="mcw",
def trainingGMMHMM(
        dataset,  # training dataset.
        n_c,  # number of hmm's components (ie. hidden states)
        n_m,  # number of gmm's mixtures (ie. Gaussian model)
        start_prob_prior=None,  # prior of start hidden states probabilities.
        trans_mat_prior=None,  # prior of transition matrix.
        start_prob=None,  # the start hidden states probabilities.
        trans_mat=None,  # the transition matrix.
        gmms=None,  # models' params of gmm
        covar_type='full',
        n_i=50
):
    # Initiation of dataset.
    # d = Dataset(dataset)
    X = dataset.getDataset()
    # Initiation of GMM.
    _GMMs = []
    if gmms is None:
        _GMMs = None
    else:
        for gmm in gmms:
            _GMM = GMM(n_components=n_m, covariance_type=covar_type)
            _GMM.covars_ = np.array(gmm["covars"])
            _GMM.means_ = np.array(gmm["means"])
            _GMM.weights_ = np.array(gmm["weights"])
            _GMMs.append(_GMM)
    # Initiation of GMMHMM.
    model = GMMHMM(
        startprob_prior=np.array(start_prob_prior),
        transmat_prior=np.array(trans_mat_prior),
        startprob=np.array(start_prob),
        transmat=np.array(trans_mat),
        gmms=_GMMs,
        n_components=n_c,
        n_mix=n_m,
        covariance_type=covar_type,
        n_iter=n_i
    )
    # Training.
    model.fit(X)
    # The result.
    new_gmmhmm = {
        "nComponent": n_c,
        "nMix": n_m,
        "covarianceType": covar_type,
        "hmmParams": {
            "startProb": model.startprob_.tolist(),
            "transMat": model.transmat_.tolist()
        },
        "gmmParams": {
            "nMix": n_m,
            "covarianceType": covar_type,
            "params": []
        }
    }

    for i in range(0, n_m):
        gaussian_model = {
            "covars": model.gmms_[i].covars_.tolist(),
            "means": model.gmms_[i].means_.tolist(),
            "weights": model.gmms_[i].weights_.tolist()
        }
        new_gmmhmm["gmmParams"]["params"].append(gaussian_model)

    return new_gmmhmm
Пример #26
0
def main():
    outdir = r'./training_files/multi'
    outdir2 = r'./training_files/arnab'
    outdir3 = r'./training_files/kejriwal'
    outdir4 = r'./training_files/ravish'
    outdir5 = r'./training_files/not-shouting'
    outdir6 = r'./training_files/shouting'
    outdir7 = r'./training_files/single'
    outdir8 = r'./training_files/modi'
    outdir9 = r'./training_files/ond_more'

    #create 3 hmm one for each case

    multi = GMMHMM(5, 2)
    discuss = GMMHMM(5, 2)
    arnab = GMMHMM(5, 2)
    kejriwal = GMMHMM(5, 2)
    ravish = GMMHMM(5, 2)

    notshouting = GMMHMM(5, 2)
    shouting = GMMHMM(5, 2)
    single = GMMHMM(5, 2)

    #training for multi

    l = get_files_list(outdir)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    multi.fit(obs)

    #training for arnab

    l = get_files_list(outdir2)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    arnab.fit(obs)

    #training for kejriwal

    l = get_files_list(outdir3)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    kejriwal.fit(obs)

    #training for ravish

    l = get_files_list(outdir4)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    ravish.fit(obs)

    #training for notshouting

    l = get_files_list(outdir5)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    notshouting.fit(obs)

    #training for shouting

    l = get_files_list(outdir6)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    shouting.fit(obs)

    #training for single

    l = get_files_list(outdir7)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    single.fit(obs)

    #Its time for some testing
    q = []
    t = "testcase_output.txt"
    out = open(t, "w")

    #Read test file and make list of list of sequence 10   for --->1
    #te=["test1.txt","test2.txt","test3.txt","test4.txt","test5.txt","test6.txt","test7.txt","test8.txt","test9.txt","test10.txt"]

    #f=open("expected.txt")
    #d_expected={}
    '''
	
	for line in f:
		x=line.strip().split()
		d_expected[x[0]]={'arnab':float(x[1]),'kejriwal':float(x[2]),'ravish':float(x[3])}
	'''

    te = get_files_list(r'./testing_files')
    #te=["test1.txt","test2.txt","test3.txt"]
    for ad in te:
        d = {"arnab": 0, "kejriwal": 0, "ravish": 0}
        f = open(ad, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            #print individual_obs
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

        p = []
        p_choosen = []
        p1_choosen = []
        p1 = []
        p2 = []
        p2_choosen = []

        #print obs
        for i in obs:
            p.append((shouting.score(i), "shouting"))
            p.append((notshouting.score(i), "notshouting"))
            p_choosen.append(max(p, key=lambda x: x[0]))
            p = []
        for i in obs:
            p1.append((arnab.score(i), "arnab"))
            p1.append((kejriwal.score(i), "kejriwal"))
            p1.append((ravish.score(i), "ravish"))
            p1_choosen.append(max(p1, key=lambda x: x[0]))
            p1 = []

        for i in obs:
            p2.append((multi.score(i), "multi"))
            p2.append((single.score(i), "single"))
            p2_choosen.append(max(p2, key=lambda x: x[0]))
            p2 = []
        #print p

        p = []
        p1 = []
        p_choosen = [b for a, b in p_choosen]
        p1_choosen = [b for a, b in p1_choosen]
        p2_choosen = [b for a, b in p2_choosen]
        '''
		#print p_choosen
		#print the state sequence with the timestamp in the output file
	
		t="testcase_output_9.txt"
		out=open(t,"a+")

		out.write(str(ad)+"--->")
		out.write(p_choosen[0])
		out.write("\n")
		'''

        #calculate the amount per second and append to the same file

        #print p_choosen
        #print p1_choosen
        shouting1 = []
        notshouting1 = []

        totaltime = len(p_choosen) * 0.05

        single_count = 0

        for i in range(len(p_choosen)):
            if p2_choosen[i] == "single":
                single_count += 1
                if p_choosen[i] == "shouting":
                    shouting1.append(p1_choosen[i])
                elif p_choosen[i] == "notshouting":
                    notshouting1.append(p1_choosen[i])
        #print d
        d_shouting = {"arnab": 0, "kejriwal": 0, "ravish": 0}
        d_notshouting = {"arnab": 0, "kejriwal": 0, "ravish": 0}

        for i in shouting1:
            d_shouting[i] += 1

        for i in notshouting1:
            d_notshouting[i] += 1

        #print p_choosen

        out.write("\n*******--> " + str(ad) + "  <--*******\n")
        #write arnab,ravish and kejri
        fn = ad.strip().split("/")
        fn = fn[len(fn) - 1]

        #out.write("Time predicted for questioning: "+str((d5['question'])*0.05)+" seconds.\n")
        #out.write("Time predicted for discussion: "+str((d5['discuss'])*0.05)+" seconds.\n")
        out.write("\nChecking single HMM and multi HMM:\n")
        out.write("Number of instance of Single: " + str(single_count) + "\n")
        out.write(
            "\nChecking shouting and non-shouting HMM for all Single instances:\n"
        )
        out.write("Number of instance of Shouting: " + str(len(shouting1)) +
                  "\n")
        out.write("Number of instance of Not-shouting: " +
                  str(len(notshouting1)) + "\n")
        out.write(
            "\nChecking the frequency of each speaker in both both shouting and not shouting instance...\n"
        )
        out.write("Shouting instance: \n" + str(d_shouting) + "\n")
        out.write("Not-Shouting instance: \n" + str(d_notshouting) + "\n")

        out.write("\nResult:\n")

        for c, d in d_shouting.items():
            out.write(
                str(c) + " was shouting for " + str(d * 0.05) + " sec.\n")

        out.write("\n")

        for c, d in d_notshouting.items():
            out.write(
                str(c) + " was not shouting for " + str(d * 0.05) + " sec.\n")

        out.write("\n")
        for c, d in d_shouting.items():
            out.write(
                str(c) + " was shouting for " +
                str(((d * 0.05) / totaltime) * 100) + " % of time.\n")

        out.write("\n")

        for c, d in d_notshouting.items():
            out.write(
                str(c) + " was not shouting for " +
                str(((d * 0.05) / totaltime) * 100) + " sec.\n")

        out.write("\n")

        print d_shouting
        print d_notshouting
Пример #27
0
def getTMHS(X):
    model = GMMHMM(n_components=3, covariance_type="diag", n_iter=1000).fit(X)
    hidden_states = model.predict(X)
    transmat = model.transmat_
    return hidden_states, transmat
Пример #28
0
import os
from hmmlearn.hmm import GMMHMM
from python_speech_features import mfcc
from scipy.io import wavfile
from sklearn.model_selection import train_test_split
import numpy as np
import sys

input_folder = '/home/sachin/Downloads/cmu_us_awb_arctic-0.95-release/cmu_us_awb_arctic/wav'
hmm_models = []

X = np.array([])
for filename in os.listdir(input_folder):
    filepath = os.path.join(input_folder, filename)
    sampling_freq, audio = wavfile.read(filepath)
    mfcc_features = mfcc(audio, sampling_freq)
    if len(X) == 0:
        X = mfcc_features
    else:
        X = np.append(X, mfcc_features, axis=0)


model = GMMHMM(n_components=3, n_mix=45, n_iter=100)
X_train, X_test = train_test_split(X, train_size=0.7)
hmm_models.append(model.fit(X_train))

print(model.score(X_test))
Пример #29
0
def test(fil):
    for f in fil:
        dat = pre_process([f])
        li = []
        w = open(f + '.out2', 'w')
        for i in range(len(dat)):
            temp = [gmm[j].score(dat[i]) for j in range(2)]
            li.append(temp.index(max(temp)))
        w.write(str(li))


if __name__ == '__main__':

    gmm = [
        GMMHMM(n_components=3, n_mix=1, covariance_type='diag')
        for i in range(2)
    ]

    path = "./mfcc_files/"
    data_path = [
        path + 'single/single_1_mfcc.txt', path + 'single/single_3_mfcc.txt',
        path + 'single/single_5_mfcc.txt', path + 'single/single_6_mfcc.txt'
    ]

    dat = pre_process(data_path)
    gmm[0].fit(dat)

    data_path = [
        path + 'multi/MULTI_1_mfcc.txt', path + 'multi/MULTI_2_mfcc.txt',
        path + 'multi/MULTI_3_mfcc.txt', path + 'multi/MULTI_4_mfcc.txt'
class GMMHMMTrainer(BaseTrainer):
    '''A wrapper to GMMHMM

    Attributes
    ----------
    _model: init params
    gmmhmm: hmmlearn GMMHMM instance
    params_: params after fit
    train_data_: current train datas
    '''

    def __init__(self, _model):
        super(GMMHMMTrainer, self).__init__(_model)

        hmm_params = _model['hmmParams']
        gmm_params = _model['gmmParams']
        n_iter = _model.get('nIter', 50)

        transmat = np.array(hmm_params['transMat'])
        transmat_prior = np.array(hmm_params['transMatPrior'])
        n_component = hmm_params['nComponent']
        startprob = np.array(hmm_params['startProb'])
        startprob_prior = np.array(hmm_params['startProbPrior'])

        n_mix = gmm_params['nMix']
        covariance_type = gmm_params['covarianceType']
        gmms = gmm_params.get('gmms', None)

        gmm_obj_list = []
        if not gmms:
            gmm_obj_list = None
        else:
            for gmm in gmms:
                gmm_obj = GMM(n_components=gmm['nComponent'], covariance_type=gmm['covarianceType'])
                gmm_obj.covars_ = np.array(gmm['covars'])
                gmm_obj.means_ = np.array(gmm['means'])
                gmm_obj.weights_ = np.array(gmm['weights'])
                gmm_obj_list.append(gmm_obj)

        self.gmmhmm = GMMHMM(n_components=n_component, n_mix=n_mix, gmms=gmm_obj_list,
                             n_iter=n_iter, covariance_type=covariance_type,
                             transmat=transmat, transmat_prior=transmat_prior,
                             startprob=startprob, startprob_prior=startprob_prior)

    def __repr__(self):
        return '<GMMHMMTrainer instance>\n\tinit_models:%s\n\tparams:%s\n\ttrain_data:%s' % (self._model,
                                                                                         self.params_, self.train_data_)

    def fit(self, train_data):
        train_data = np.array(train_data)
        self.gmmhmm.fit(train_data)

        gmms_ = []
        for gmm in self.gmmhmm.gmms_:
            gmms_.append({
                'nComponent': gmm.n_components,
                'nIter': gmm.n_iter,
                'means': gmm.means_.tolist(),
                'covars': gmm.covars_.tolist(),
                'weights': gmm.weights_.tolist(),
                'covarianceType': gmm.covariance_type,
            })
        self.train_data_ += train_data.tolist()
        self.params_ = {
            'nIter': self.gmmhmm.n_iter,
            'hmmParams': {
                'nComponent': self.gmmhmm.n_components,
                'transMat': self.gmmhmm.transmat_.tolist(),
                'transMatPrior': self.gmmhmm.transmat_prior.tolist(),
                'startProb': self.gmmhmm.startprob_.tolist(),
                'startProbPrior': self.gmmhmm.startprob_prior.tolist(),
            },
            'gmmParams': {
                'nMix': self.gmmhmm.n_mix,
                'covarianceType': self.gmmhmm.covariance_type,
                'gmms': gmms_,
            }
        }
Пример #31
0
# The transition matrix, note that there are no transitions possible
# between component 1 and 4
trans_mat = np.array([[0.5, 0.2, 0.2, 0.1],
                      [0.3, 0.4, 0.2, 0.1],
                      [0.1, 0.2, 0.5, 0.2],
                      [0.2, 0.1, 0.1, 0.6]])

start_prob_prior = np.array([0.3, 0.3, 0.3, 0.1])

trans_mat_prior = np.array([[0.2, 0.1, 0.3, 0.4],
                            [0.3, 0.2, 0.2, 0.3],
                            [0.1, 0.1, 0.1, 0.7],
                            [0.1, 0.3, 0.4, 0.2]])

# Build an HMM instance and set parameters
model_dining  = GMMHMM(startprob_prior=start_prob_prior, transmat_prior=trans_mat_prior, startprob=start_prob, transmat=trans_mat, n_components=4, n_mix=4, covariance_type='spherical', n_iter=50)
model_fitness = GMMHMM(startprob_prior=start_prob_prior, transmat_prior=trans_mat_prior, startprob=start_prob, transmat=trans_mat, n_components=4, n_mix=10, covariance_type='spherical', n_iter=50)
model_work    = GMMHMM(startprob_prior=start_prob_prior, transmat_prior=trans_mat_prior, startprob=start_prob, transmat=trans_mat, n_components=4, n_mix=8, covariance_type='spherical', n_iter=50)
model_shop    = GMMHMM(startprob_prior=start_prob_prior, transmat_prior=trans_mat_prior, startprob=start_prob, transmat=trans_mat, n_components=4, n_mix=4, covariance_type='spherical', n_iter=50)

# print model_dining.gmms_[0].covars_.tolist()
# print model_dining.gmms_[0].means_.tolist()
# print model_dining.gmms_[0].weights_.tolist()

dataset_dining  = Dataset()
dataset_fitness = Dataset()
dataset_work    = Dataset()
dataset_shop    = Dataset()

# print Dataset().randomObservations('dining_out_in_chinese_restaurant', 10, 10).obs
Пример #32
0
def trainingGMMHMM(
        dataset,  # training dataset.
        n_c,  # number of hmm's components (ie. hidden states)
        n_m,  # number of gmm's mixtures (ie. Gaussian model)
        start_prob_prior=None,  # prior of start hidden states probabilities.
        trans_mat_prior=None,  # prior of transition matrix.
        start_prob=None,  # the start hidden states probabilities.
        trans_mat=None,  # the transition matrix.
        gmms=None,  # models' params of gmm
        covar_type='full',
        n_i=50):
    # Initiation of dataset.
    # d = Dataset(dataset)
    X = dataset.getDataset()
    # Initiation of GMM.
    _GMMs = []
    if gmms is None:
        _GMMs = None
    else:
        for gmm in gmms:
            _GMM = GMM(n_components=n_m, covariance_type=covar_type)
            _GMM.covars_ = np.array(gmm["covars"])
            _GMM.means_ = np.array(gmm["means"])
            _GMM.weights_ = np.array(gmm["weights"])
            _GMMs.append(_GMM)
    # Initiation of GMMHMM.
    model = GMMHMM(startprob_prior=np.array(start_prob_prior),
                   transmat_prior=np.array(trans_mat_prior),
                   startprob=np.array(start_prob),
                   transmat=np.array(trans_mat),
                   gmms=_GMMs,
                   n_components=n_c,
                   n_mix=n_m,
                   covariance_type=covar_type,
                   n_iter=n_i)
    # Training.
    model.fit(X)
    # The result.
    new_gmmhmm = {
        "nComponent": n_c,
        "nMix": n_m,
        "covarianceType": covar_type,
        "hmmParams": {
            "startProb": model.startprob_.tolist(),
            "transMat": model.transmat_.tolist()
        },
        "gmmParams": {
            "nMix": n_m,
            "covarianceType": covar_type,
            "params": []
        }
    }

    for i in range(0, n_m):
        gaussian_model = {
            "covars": model.gmms_[i].covars_.tolist(),
            "means": model.gmms_[i].means_.tolist(),
            "weights": model.gmms_[i].weights_.tolist()
        }
        new_gmmhmm["gmmParams"]["params"].append(gaussian_model)

    return new_gmmhmm
Пример #33
0
    # prediction sample for every entry of test set
    prediction = np.zeros(len(X))

    for i in range(len(X)):
        prediction[i] = np.argmax(
            [model.score(X[i]) for key, model in models.items()])

    test_confusion_matrix = sklearn.metrics.confusion_matrix(
        y, prediction, labels=range(n_classes))
    test_accuracy = np.sum(
        np.diagonal(test_confusion_matrix)) / np.sum(test_confusion_matrix)

    print("Test Accuracy: ", test_accuracy)
    print("Test Confusion Matrix:")
    print(test_confusion_matrix)


if __name__ == '__main__':
    X_train, X_test, y_train, y_test = prepare_data(augment_iter=4)

    # Test Accuracy:  0.7905
    models = {}
    for i in range(n_classes):
        models.update({i: GMMHMM(n_components=8, n_mix=3)})

    # train models
    train(models, X_train, y_train)

    # evaluate models
    test(models, X_test, y_test)
obs4 = [newPicObservations[i] for i in range(len(newPicObservations)) if labels[i] == '4']
obs5 = [newPicObservations[i] for i in range(len(newPicObservations)) if labels[i] == '5']
obs6 = [newPicObservations[i] for i in range(len(newPicObservations)) if labels[i] == '6']

#newPicObservations.shape = (48, 48)

model0Gau = GaussianHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs0)
model1Gau = GaussianHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs1)
model2Gau = GaussianHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs2)
model3Gau = GaussianHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs3)
model4Gau = GaussianHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs4)
model5Gau = GaussianHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs5)
model6Gau = GaussianHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs6)

try:
    model0GauMix = GMMHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs0)
    model1GauMix = GMMHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs1)
    model2GauMix = GMMHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs2)
    model3GauMix = GMMHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs3)
    model4GauMix = GMMHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs4)
    model5GauMix = GMMHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs5)
    model6GauMix = GMMHMM(n_components=48, covariance_type='full', n_iter=100).fit(obs6)
except ValueError:
    print 'err gaumix'

try:
    model0Multi = MultinomialHMM(n_components=48, n_iter=100).fit(obs0)
    model1Multi = MultinomialHMM(n_components=48, n_iter=100).fit(obs1)
    model2Multi = MultinomialHMM(n_components=48, n_iter=100).fit(obs2)
    model3Multi = MultinomialHMM(n_components=48, n_iter=100).fit(obs3)
    model4Multi = MultinomialHMM(n_components=48, n_iter=100).fit(obs4)