示例#1
0
def seqHMM():
    clf = MultinomialHMM()
    clf.fit(input_data.iloc[:, :-1], input_data.iloc[:, -1], lengths)
    pred = clf.predict(input_data.iloc[:, :-1])
    actual = input_data.iloc[:, -1]
    accuracy = sum(pred == actual) / float(len(actual))
    print accuracy
示例#2
0
def fit_validate_hmm(df: DataFrame,
                     y_col: str,
                     seq_id_col: str,
                     feature_cols: List[str],
                     k: int = 10,
                     alpha: int = 0.01) -> dict:
    ''' Fit HMM using features on sequence of tokens df_tokens from sequences seq_id_col. '''
    df_tokens = df.copy()

    # add one hot vectors encoded from features
    add_one_hot_vectors(df_tokens, feature_cols)

    # get sequence lengths
    df_lengths = get_sequence_lengths(df_tokens, seq_id_col)

    # cross validate model
    model = MultinomialHMM(alpha=alpha)

    # get cross validated scores
    k_scores = get_validated_scores(model, df_tokens, y_col, seq_id_col,
                                    df_lengths, k)

    # fit model on entire dataset
    X = np.vstack(df_tokens.one_hot_vector.values)
    y = df_tokens[y_col]
    l = df_lengths.length

    model.fit(X, y, l)

    # return model, data, metrics
    return {'model': model, 'k_scores': k_scores}
示例#3
0
def trainHMM(data):
    # # # Extracts features from the datasets
    # X_train, y_train, lengths_train = load_conll(data, features)
    # # Models it as an HMM
    clf = MultinomialHMM()
    clf.fit(X_train, y_train, lengths_train)

    # print X_train, y_train
    return clf
示例#4
0
def hmm_pred(a, X_train, X_test, y_train, y_test):
    # скрытая марковская модель
    hmm = MultinomialHMM(alpha=a)
    hmm.fit(X_train, y_train, lengths=np.array([1 for i in y_train]))
    y_pred = hmm.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    F1 = f1_score(y_test, y_pred, average='weighted')
    return [accuracy, precision, recall, F1]
示例#5
0
文件: hmm.py 项目: ajvish91/finalhmm
def trainHMM(X_train, y_train):
    # # # Extracts features from the datasets
    # # Models it as an HMM
    clf = MultinomialHMM()
    print "y shape", y_train.shape[0]
    lengths_train = []
    for x in X_train:
        lengths_train.append(0)

    print lengths_train
    clf.fit(X_train, y_train, [len(y_train)])

    return clf
示例#6
0
def train_and_test_markov(decode, alpha, X_train, y_train,
                          sequence_length_train, X_test, y_test,
                          sequence_length_test, *args, **kwargs):
    clf = MultinomialHMM(decode=decode, alpha=alpha)
    #print("Training {}".format(clf))
    start = time.time()
    clf.fit(X_train, y_train, sequence_length_train)
    mid = time.time()
    y_pred = clf.predict(X_test, sequence_length_test)
    stop = time.time()
    accuracy = 100 * accuracy_score(y_pred, y_test)
    fit_time = 1000 * (mid - start)
    pred_time = 1000 * (stop - mid)
    #print("Fit time: {:.3f}ms, Predict time: {:.3f}ms, Accuracy: {:.2f}".format(fit_time, pred_time, accuracy))
    return (fit_time, pred_time, accuracy)
示例#7
0
def test_hmm():
    n_features = X.shape[1]

    clf = MultinomialHMM()
    clf.fit(X, y, lengths)
    assert_array_equal(clf.classes_, ["Adj", "DT", "IN", "N", "V"])
    assert_array_equal(clf.predict(X), y)

    n_classes = len(clf.classes_)
    assert_array_almost_equal(np.ones(n_features),
                              np.exp(clf.coef_).sum(axis=0))
    assert_array_almost_equal(np.ones(n_classes),
                              np.exp(clf.coef_trans_).sum(axis=0))
    assert_array_almost_equal(1., np.exp(clf.coef_final_).sum())
    assert_array_almost_equal(1., np.exp(clf.coef_init_).sum())
示例#8
0
def train_HMM(X_train, y_train, house, f, i):
    #X_train = [s2features(s) for s in X_train]
    clf = MultinomialHMM(decode='viterbi')
    trainLens = np.array(map(lambda x: len(x), X_train))

    X_train = np.array(np.concatenate(X_train))
    y_train = np.array(np.concatenate(y_train))

    print(X_train)
    print(len(X_train))
    print(X_train[0].shape)
    print(len(y_train))
    print(y_train[0])
    print(trainLens, sum(trainLens))

    clf.fit(X_train, y_train, trainLens)

    #model_name = 'crf_models/house_' + house + '_'+ f + str(i) + '.crfsuite'
    #trainer.train(model_name)
    print str(i), '. House:', house, '. Feature: ', f, ' training complete.'
    return clf
示例#9
0
    def _hmm(self, ind: Individual, train: Dataset, dev: Dataset):
        train_lengths = [len(s) for s in train.sentences]
        xtrain, ytrain = train.by_word()

        xdev, _ = dev.by_word()
        dev_lengths = [len(s) for s in dev.sentences]

        try:
            hmm = MultinomialHMM(decode=ind.choose('viterbi', 'bestfirst'),
                                 alpha=ind.nextfloat())
            hmm.fit(xtrain, ytrain, train_lengths)

            return hmm.predict(xdev, dev_lengths)

        except ValueError as e:
            if 'non-negative integers' in str(e):
                raise InvalidPipeline(str(e))
            elif 'unknown categories' in str(e):
                raise InvalidPipeline(str(e))
            else:
                raise
示例#10
0
def train_HMM(X_train, y_train, house, f,i ):
    #X_train = [s2features(s) for s in X_train]
    clf = MultinomialHMM(decode='viterbi')
    trainLens = np.array(map(lambda x: len(x), X_train))

    X_train = np.array(np.concatenate(X_train))
    y_train = np.array(np.concatenate(y_train))


    print(X_train)
    print(len(X_train))
    print(X_train[0].shape)
    print(len(y_train))
    print(y_train[0])
    print(trainLens, sum(trainLens))

    clf.fit(X_train, y_train, trainLens)

    #model_name = 'crf_models/house_' + house + '_'+ f + str(i) + '.crfsuite'
    #trainer.train(model_name)
    print str(i),'. House:', house, '. Feature: ', f, ' training complete.'
    return clf
def test_hmm():
    n_features = X.shape[1]

    clf = MultinomialHMM()
    clf.fit(X, y, lengths)
    assert_array_equal(clf.classes_, ["Adj", "DT", "IN", "N", "V"])
    assert_array_equal(clf.predict(X), y)

    clf.set_params(decode="bestfirst")
    assert_array_equal(clf.predict(X), y)

    n_classes = len(clf.classes_)
    assert_array_almost_equal(np.ones(n_features),
                              np.exp(clf.coef_).sum(axis=0))
    assert_array_almost_equal(np.ones(n_classes),
                              np.exp(clf.intercept_trans_).sum(axis=0))
    assert_array_almost_equal(1., np.exp(clf.intercept_final_).sum())
    assert_array_almost_equal(1., np.exp(clf.intercept_init_).sum())
示例#12
0
def gridSearch(seqs, lens, decodes=[None], alphas=[None], init_eq_anys=[None]):
    maxAcc = 0.0
    maxAccs = None
    bestClf = None
    for d, a, i in itertools.product(*[decodes, alphas, init_eq_anys]):
        clf = MultinomialHMM(decode=d, alpha=a, init_eq_any=i)
        accs = crossValidate(clf, seqs, lens)
        meanAcc = accs.mean()
        if meanAcc > maxAcc:
            maxAcc = meanAcc
            maxAccs = accs
            bestClf = clf
    '''
    for decode in decodes:
        for alpha in alphas:
            clf = MultinomialHMM(decode=decode, alpha=alpha)
            accs = crossValidate(clf, seqs, lens)
            meanAcc = accs.mean()
            if meanAcc > maxAcc:
                maxAcc = meanAcc
                maxAccs = accs
                bestClf = clf
    '''
    return bestClf, maxAccs
示例#13
0
import pandas as pd
import numpy as np
from seqlearn.hmm import MultinomialHMM

model = MultinomialHMM(decode='viterbi', alpha=0.01)

# -- training --

training_data = []
training_labels = []
training_data_length = []

dataFile = pd.read_csv("../data/training-leftright-avkfxrmpauHdDpeaAAAa-3.csv",
                       header=0)
data = [dataFile['accX'][:5], dataFile['accY'][:5], dataFile['accZ'][:5]]
#data = [dataFile['alpha'], dataFile['beta'], dataFile['gamma'], dataFile['accX'], dataFile['accY'], dataFile['accZ']]

length = len(dataFile['accX'][:5])
training_data_length.append([length, length,
                             length])  # 3 items because X, Y, Z data

training_data.append(data)
training_labels.append('leftright')

dataFile = pd.read_csv("../data/training-updown-avkfxrmpauHdDpeaAAAa-1.csv",
                       header=0)
data = [dataFile['accX'][:5], dataFile['accY'][:5], dataFile['accZ'][:5]]
#data = [dataFile['alpha'], dataFile['beta'], dataFile['gamma'], dataFile['accX'], dataFile['accY'], dataFile['accZ']]

length = len(dataFile['accX'][:5])
training_data_length.append([length, length,
示例#14
0
    # flatten X_train for HMM function
    X_train_flatten = [item for sublist in X_train for item in sublist]
    X_test_flatten = [item for sublist in X_test for item in sublist]

    # flatten X_train for HMM function
    y_ground_truth_flatten = [
        item for sublist in y_ground_truth_seqlearn for item in sublist
    ]

    # change type to array
    seqlearn_X_train = np.array(X_train_flatten)
    seqlearn_y_ground_truth = np.array(y_ground_truth_flatten)

    # HMM seqlearn MultimodalHMM
    model_seqlearn = MultinomialHMM()

    # training
    model_seqlearn.fit(seqlearn_X_train, seqlearn_y_ground_truth, len_train)

    # state prediction
    y_pred_seqlearn = model_seqlearn.predict(X_test_flatten)

    # print output time remarks
    outputSteps(y_pred_seqlearn)

    #state prediction for random sequence
    y_pred_seqlearn_random = model_seqlearn.predict(X_random)

    # state prediction for heuristic sequence
    y_pred_seqlearn_random = model_seqlearn.predict(X_heuristic)
    X_train = (((X_tr[:, None] & (1 << np.arange(8)))) > 0).astype(
        int)  # vector-> binary matrix
    Y_train = np.array(Y_train)
    # X_test = np.array(X_test).reshape(-1,1)
    X_te = np.array(X_test)
    X_test = (((X_te[:, None] & (1 << np.arange(8)))) > 0).astype(int)
    Y_test = np.array(Y_test)
    return [X_train, X_test, Y_train, Y_test]


data = load_dataset()
kf = SequenceKFold(seq_lengths(data[1]), 2)
for tuple in kf:
    train_len = tuple[1]
    test_len = tuple[3]
    split = dataset_split(tuple[0], tuple[2])

    #train the model
    clf = MultinomialHMM()
    clf.fit(split[0], split[2], train_len)

    #evaluate the model
    Y_pred = clf.predict(split[1], test_len)
    print('Accuracy:')
    print(clf.score(split[1], split[3], test_len))
    print('Confusion matrix:')
    labels = list(data[2].values())
    print(confusion_matrix(split[3], Y_pred, labels))
    print('Report:')
    target_names = list(data[2].keys())
    print(classification_report(split[3], Y_pred, target_names=target_names))
示例#16
0
def trainMultinomialHMM(data, classes, seq_lengths, dump_file):
    clf = MultinomialHMM(decode='viterbi', alpha=0.01)
    baseSeqClassifierTrain(clf, "Multinomial Hidden Markov Model", data,
                           classes, seq_lengths, dump_file)
示例#17
0
def testMultinomialHMM(data, classes, seq_lengths, n_folds, metric=''):
    clf = MultinomialHMM(decode='bestfirst', alpha=1.0)
    baseSeqClassifierTest(clf, "Multinomial Hidden Markov Model", data,
                          classes, seq_lengths, n_folds, metric)
示例#18
0
mat2 = scipy.io.loadmat('train_subject1_psd03.mat')
X2 = mat1['X']
Y2 = mat1['Y']

mat_test = scipy.io.loadmat('test_subject1_psd04.mat')
test_X = mat_test['X']
true_label = np.loadtxt('test_subject1_true_label.csv', delimiter=",")

X = mat['X']
Y = mat['Y']

new_X = np.concatenate((X, X1, X2), axis=0)
new_Y = np.concatenate((Y, Y1, Y2), axis=0)

clf = MultinomialHMM()
clf.fit(new_X, new_Y, len(new_X))
clf.set_params(decode="bestfirst")
ans = clf.predict(test_X)

print 'sub-1, custom', accuracy_score(ans, true_label)
print confusion_matrix(true_label, ans)
#1440/3504: subject 1 accuracy
#start subject-2
sub2_1 = scipy.io.loadmat('train_subject2_psd01.mat')
sub2_X1 = sub2_1['X']
sub2_Y1 = sub2_1['Y']

sub2_2 = scipy.io.loadmat('train_subject2_psd02.mat')
sub2_X2 = sub2_2['X']
sub2_Y2 = sub2_2['Y']
示例#19
0
		if (prediction[int(index)] == 0):
			print(n_word)
		elif (prediction[int(index)] == 1):
			print(n_word +"s")
		elif (prediction[int(index)] == 3):
			print(n_word[:-1] + "ies")
		else:
			print(n_word +"es")
	print("ACCURACY" + str(sum(1 for i,j in zip(prediction,target) if i == j)*1.0/len(prediction)))
#just return accuracy for tuning on dev set
def evaluate(prediction, target, input_x):
	return sum(1 for i,j in zip(prediction,target) if i == j)*1.0/len(prediction)


#import data from csv file
model = MultinomialHMM()
data = pd.read_csv("weighted_data.csv")


alphabet_dict = dict(zip(string.ascii_lowercase, range(1,27)))
reverse_dict = dict(zip(range(1,27), string.ascii_lowercase))
X = []
Y = []

for index, row in data.iterrows():
	w_class = 4
	singular = row[0]
	plural = row[1]
	singular_without_end = singular[:len(singular)-1]
	#append -es cases
	if (plural == singular + 'es'):
示例#20
0
def hmm_pred(X, y):
    # скрытая марковская модель
    hmm = MultinomialHMM(alpha=0.1)
    hmm.fit(X, y, lengths=np.array([1 for i in y]))

    return hmm
def test_hmm_validation():
    assert_raises(ValueError, MultinomialHMM(alpha=0).fit, X, y, lengths)
    assert_raises(ValueError, MultinomialHMM(alpha=-1).fit, X, y, lengths)
示例#22
0
# Helper functions:


# get all the data files from the directory
def getDataFileNames(dataType, movement="", dataFolder=DATA_FOLDER):
    files = os.listdir(dataFolder)
    output = []
    for file in files:
        if dataType in file and movement in file:
            output.append(file)
    return output


# ------------------- MAIN ------------------------------------

model = MultinomialHMM(decode='viterbi', alpha=0.01)

# -- training --

training_data = []
training_labels = []
training_data_length = []

files = getDataFileNames("training")
for trainingFile in files:
    dataFile = pd.read_csv(DATA_FOLDER + trainingFile, header=0)
    data = [
        dataFile['accX'][:199], dataFile['accY'][:199], dataFile['accZ'][:199]
    ]
    #data = [dataFile['alpha'], dataFile['beta'], dataFile['gamma'], dataFile['accX'], dataFile['accY'], dataFile['accZ']]
示例#23
0
from seqlearn.hmm import MultinomialHMM
from hmmlearn.hmm import GaussianHMM


input_data = pd.read_csv('../data/scaled_data/scaled_pca.csv')
lengths = [len(input_data)]

d1 = pd.read_csv('../data/train_subject1_psd01.csv',header=None)
d2 = pd.read_csv('../data/train_subject1_psd02.csv',header=None)
d3 = pd.read_csv('../data/train_subject1_psd03.csv',header=None)

#input_data = pd.concat([d1, d2, d3], axis=0)
lengths = [len(d1), len(d2), len(d3)]

clf = MultinomialHMM()
clf.fit(input_data.iloc[:,:-1], input_data.iloc[:,-1], lengths)
pred = clf.predict(input_data.iloc[:,:-1])
actual = d3.iloc[:,-1]
accuracy = sum(pred == actual)/float(len(actual))
print accuracy


# Random Forest



from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn import svm
import pandas as pd
示例#24
0
# Saves the image into a TXT file
for line in line_mapping:
    for word in line:
        if word["matrix"].shape[1] == 0:
            print "Zero matrix... Skipping..."
        else:
            f_handle = file('test.txt', 'a')
            np.savetxt(f_handle, word['matrix'], delimiter=" ",
                       fmt="%i", newline=" ",
                       header='', footer="" + word["word"] + "\n\n",
                       comments='')
            f_handle.close()

# # Extracts features from the datasets
X_train, y_train, lengths_train = load_conll("test.txt", features)

# # Models it as an HMM
clf = MultinomialHMM()
clf.fit(X_train, y_train, lengths_train)

print X_train, y_train

# Validation after training
X_test, y_test, lengths_test = load_conll("test.txt", features)
y_pred = clf.predict(X_test, lengths_test)

print y_pred
# # Final score
# print(bio_f_score(y_test, y_pred))
model = hmm.GaussianHMM(n_components=5)
model.fit(X, lengths)
test = "apple"
test_int = []
for let in test:
    test_int.append(alphabet_dict[let])
print(model.predict(np.array([test_int]), lengths=[5]))
print(alphabet_dict)
'''
Documentation of seqlearn: http://larsmans.github.io/seqlearn/reference.html
Some examples of HMMs using seqlearn
seqlearn is supervised leanring vs hmmlearn, which is unsupervised.

'''

model = MultinomialHMM()
# encode

X = pd.DataFrame()
y = pd.DataFrame()
# Here 0 represents pluarl = singuar + '', 1 represents plural = singular +s, and 2 represnets plural = singular + 'es'
# data preparation
# index the words such that a = 1, z = 26.
w_class = 0
import pdb
pdb.set_trace()
for index, row in all_data.iterrows():
    singular = row[0]
    plural = row[1]
    if (plural[-2] == 'es'):
        w_class = 2