示例#1
0
    def hmmTrain(self, poiList, lengthList, lenTrainData):

        poiList = self.mypt.poi2newCate(poiList, self.dictgt)
        if (len(poiList) < sum(lengthList)):
            self.userState[1] += 1
            return -1
        # 寻找长度适中的poi序列作为测试集
        sortedlength = sorted(lengthList)
        idxmedian = sortedlength[int(np.ceil(len(sortedlength) / 2))]
        poiTest = []
        poiTrain = []
        lengthNew = []
        idxprev = 0
        for i, length in enumerate(lengthList):
            if (length != idxmedian):
                poiTrain.extend(poiList[idxprev:idxprev + length])
                lengthNew.append(length)
                idxprev = idxprev + length
            else:
                poiTest.extend(poiList[idxprev:idxprev + length])
                idxprev = idxprev + length

        # poiTrain = poiList[0:lenTrainData]
        # poiTest = poiList[lenTrainData:]
        setTrain = set(poiTrain)
        setTest = set(poiTest)
        if (not setTrain > setTest):  # 必须保证测试集是训练集的子集
            self.userState[2] += 1
            return -1

        # LabelEncoder 为训练样本序列编码,以满足hmmlearn.fit()的要求,详见云笔记
        le = preprocessing.LabelEncoder()
        le.fit(list(setTrain))
        trainEncode = le.transform(poiTrain)
        testEncode = le.transform(poiTest)

        X = np.atleast_2d(trainEncode)
        Xtest = np.atleast_2d(testEncode)

        # ohe = OneHotEncoder()
        # ohe.fit(np.array(range(8)).reshape(8,1))
        # poiList = np.array(poiList).reshape(len(poiList),1)
        # poiArray = ohe.transform(poiList).toarray()
        # poiArray = np.atleast_2d(poiArray)
        # #Y = np.atleast_2d(np.array([0,1,2,3,4,5,6,7,8,9,10,11,12]))
        lengthList = np.array(lengthList)
        remodel = hmm.MultinomialHMM(n_components=6)

        modelBest = []
        scoreHighest = -1000000
        for i in range(15):
            # print len(X[0])
            # print sum(lengthNew)
            remodel.fit(X.T, lengthNew)
            modelScore = remodel.score(X.T, lengthNew)
            if (modelScore > scoreHighest):
                scoreHighest = modelScore
                modelBest = remodel
            #print modelScore
        # 现在的测试是不严谨的,没有考虑‘测试集中存在训练集未出现的样例’的情况

        hstate = modelBest.predict(Xtest.T)
        predictTrue = self.catePredict(modelBest.emissionprob_, hstate, Xtest)
        print predictTrue
        return predictTrue
示例#2
0
文件: tme8.py 项目: Kabegami/MAPSI
Xgenes  = data.get("genes") #Les genes, une array de arrays

Genome = data.get("genome") #le premier million de bp de Coli

Annotation = data.get("annotation") ##l'annotation sur le genome
##0 = non codant, 1 = gene sur le brin positif

### Quelques constantes
DNA = ["A", "C", "G", "T"]
stop_codons = ["TAA", "TAG", "TGA"]


n_states_m1 = 4
# syntaxe objet python: créer un objet HMM
model1 = hmm.MultinomialHMM(n_components =  n_states_m1,init_params='ste')

#Sous question 1
#l'esperance de la loi géométrique est 1 / p donc pour p = a, on obtient que a = 1 / 200 bp
#Pour calculer b, on  prend la longueur moyenne d'un gène a priori
a = 1.0 / 200.0
s = 0
for gene in Xgenes:
    s += len(gene)
s = s / len(Xgenes)
#la taille moyenne d'un gène est de taille s. Par un raisonnement similaire que pour le paramètre a, on a b = 1 /s
b = 3.0 / s
print('a : ', a)
print('b : ', b)
#print('Xgenes : ', Xgenes[0])
示例#3
0
    def train(self):
        Hstate_num = range(len(self.p_state))  #同一个 p_id , 每一条数据不重复的观测序列的数目
        Ostate_num = range(len(self.p_state))  #同一个 p_id , 每一条数据观测序列的数目
        Ostate = []  #观测状态序列 集
        print("my_test********************self.p_state")
        print(self.p_state)
        for (index, value) in enumerate(self.p_state):
            #value = [[78], [46], [78]]
            Ostate.append(value)
            #	    print("my_test********************value")
            #	    print(value)
            #	    print("my_test*****************np.array(value).reshape(1,len(value))[0]")
            #	    print(np.array(value).reshape(1,len(value))[0])
            tmp_value = copy.deepcopy(value)
            tmp_value.pop()
            tmp_value.pop()
            tmp_value.pop()
            tmp_value.pop()  #删除后面的0 1 2 3
            Hstate_num[index] = len(
                set(np.array(tmp_value).reshape(
                    1, len(tmp_value))[0]))  #set() 函数创建一个无序不重复元素集
            Ostate_num[index] = len(value)

        self.Ostate = Ostate
        #[ [[78], [46], [78]](一次http请求某一个参数泛化之后的数组) , [另外一次http请求,但参数的md5一样] ,[ ]  , ]
        self.Hstate_num = Hstate_num  #参数md5  对应的参数的状态数
        #[ [ [ [78], [46], [78] ] ] , [ ] ]  -> [ 2 , ... ]
        self.n = int(round(
            np.array(Hstate_num).mean()))  #隐藏状态数    #round()四舍五入  mean()求均值
        print("my_test******************隐藏状态数")
        print(self.n)
        model = hmm.MultinomialHMM(n_components=self.n, n_iter=1000, tol=0.01)
        print("my_test************************self.Ostate")
        print(self.Ostate)
        print("my_test************************Ostate_num")
        print(Ostate_num)

        #	X1 = [[0.5], [1.0], [-1.0], [0.42], [0.24]]
        #	X2 = [[2.4], [4.2], [0.5], [-0.24]]
        #	X = np.concatenate([X1, X2])
        #	print("my_test****************************X = np.concatenate([X1, X2])")
        #	print(X)
        t_list = []
        for item in self.Ostate:
            t_list = t_list + item
        model.fit(np.array(t_list).reshape(-1, 1), lengths=Ostate_num)
        #	model.fit(np.array(self.Ostate))

        #	for i in range(len(self.Ostate)):
        #	    model.fit(np.array(self.Ostate[i]))
        #	    print("my_test**************************m.startprob_")
        #           print(model.startprob_)
        #           print("my_test**************************m.transmat_")
        #           print(model.transmat_)
        #           print("my_test**************************m.emissionprob_")
        #           print(model.emissionprob_)

        self.model = model
        print("my_test**************************m.startprob_")
        print(model.startprob_)
        print("my_test**************************m.transmat_")
        print(model.transmat_)
        print("my_test**************************m.emissionprob_")
        print(model.emissionprob_)
示例#4
0
from hmmlearn import hmm
import numpy as np

startprob = np.array([1.0, 0.0])
transmat = np.array([[0.7, 0.3], [0.5, 0.5]])
emission_probs = np.array([[0.2, 0.1, 0.7], [0.3, 0.6, 0.1]])

model = hmm.MultinomialHMM(n_components=2)

model.startprob_ = startprob
model.transmat_ = transmat
model.emissionprob_ = emission_probs

# sample the model - X is the observed values (Dribble, Pass & Shoot sequence)
# and Z is the "hidden" states (Healthy & Injured sequence)
samples = 300
iters = 10000
print("With %d samples and %d iterations:" % (samples, iters))
X, Z = model.sample(samples)

# Make an HMM instance and execute fit
newModel = hmm.MultinomialHMM(n_components=2, n_iter=iters).fit(X)

print("Original Model:")
print("Transition matrix")
print(transmat)
print("Emission probabilities")
print(emission_probs)
print("---------------------------------")
print("Fitted Model:")
print("Transition matrix")
示例#5
0
def main():
    ## READ FILE ##
    string = readFile()

    ## CLEAN TEXT ##
    string = cleanString(string)

    ## Keys definition and keyboard arrangement ##
    KEYS, keyboardArrangement = keysAndArrange()

    ## NOISY FILE ##
    cEmissions = noisyFileCreator(string, keyboardArrangement, KEYS)

    ## SPLIT DATA ##
    clean = string.split()
    noisy = open("noisy.txt", "r").read().split()
    X_train, X_test, y_train, y_test = split_data(clean, noisy)

    ## CREATING HMM MODEL##
    states = KEYS
    symbols = KEYS
    initial = np.array([1 / 26 for i in states])
    model = hmm.MultinomialHMM(n_components=len(KEYS))
    model.startprob_ = initial
    model.transmat_ = vTransitions(X_train, KEYS)
    model.emissionprob_ = vEmissions(cEmissions)

    ## VITERBIE ##
    vResult = viterbie(KEYS, model, y_test)

    ## TRUE NEGATIVE ##
    TN = 0
    for i in range(len(X_test)):
        for j in range(len(X_test[i])):
            if X_test[i][j] == vResult[i][j] and X_test[i][j] == y_test[i][j]:
                TN += 1

    print("TRUE NEGATIVE = " + str(TN))

    ## FALSE POSITIVE ##
    FP = 0
    for i in range(len(X_test)):
        for j in range(len(X_test[i])):
            if X_test[i][j] != vResult[i][j] and X_test[i][j] == y_test[i][j]:
                FP += 1

    print("FALSE POSITIVE = " + str(FP))

    ## TRUE POSITIVE ##
    TP = 0
    for i in range(len(X_test)):
        for j in range(len(X_test[i])):
            if X_test[i][j] == vResult[i][j] and X_test[i][j] != y_test[i][j]:
                TP += 1

    print("TRUE POSITIVE = " + str(TP))

    ## FALSE NEGATIVE ##
    FN = 0
    for i in range(len(X_test)):
        for j in range(len(X_test[i])):
            if X_test[i][j] != vResult[i][j] and X_test[i][j] != y_test[i][j]:
                FN += 1

    print("FALSE NEGATIVE = " + str(FN))
    print()

    ## PRECISION RECALL ##
    print("PRECISION = " + str(TP / (TP + FN)))
    print("RECALL = " + str(TP / (TP + FP)))
示例#6
0
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)
    from hmmlearn import hmm

import lang
import utils

dictionary, X, lengths = utils.parseInput()

trainData = np.array([X]).reshape(-1, 1)

try:
    model = joblib.load("model.pkl")
except:
    model = hmm.MultinomialHMM(n_components=10, n_iter=3, verbose=True)
    model.fit(trainData, lengths=lengths)

joblib.dump(model, "model.pkl")

def printSonnet(sonnet):
    for i, line in enumerate(sonnet):
        padding = ''
        if i > 11:
            padding = '  '
        words = ["\\textcolor{" + str(s) + "}{" + w + "}" for w, s in line]
        # words = [w for (w, _) in line]
        # words[0] = words[0].capitalize()
        print(padding + ' '.join(words))
    # print ' '.join([w for (w, _) in currLine]), nextWord
    print ' '
示例#7
0
import numpy as np

from hmmlearn import hmm

# Set random seed for reproducibility
np.random.seed(1000)


if __name__ == '__main__':
    # Create a Multinomial HMM
    hmm_model = hmm.MultinomialHMM(n_components=2, n_iter=100, random_state=1000)

    # Define a list of observations
    observations = np.array([[0], [1], [1], [0], [1], [1], [1], [0], [1],
                             [0], [0], [0], [1], [0], [1], [1], [0], [1],
                             [0], [0], [1], [0], [1], [0], [0], [0], [1],
                             [0], [1], [0], [1], [0], [0], [0], [0], [0]], dtype=np.int32)

    # Fit the model using the Forward-Backward algorithm
    hmm_model.fit(observations)

    # Check the convergence
    print('Converged: {}'.format(hmm_model.monitor_.converged))

    # Print the transition probability matrix
    print('\nTransition probability matrix:')
    print(hmm_model.transmat_)

    # Create a test sequence
    sequence = np.array([[1], [1], [1], [0], [1], [1], [1], [0], [1],
                         [0], [1], [0], [1], [0], [1], [1], [0], [1],
示例#8
0
        nearestL = 99
        # Iterate over number_k_train on the right side
        for t in range(0, centerL.__len__()):
            # Calculate distance between points, test and train
            distL = math.sqrt((ZL[i, 0] - centerL[t, 0])**2 +
                              (ZL[i, 1] - centerL[t, 1])**2)
            # Get the nearest distance
            if distL < nearestL:
                nearestL = distL
                aux = t
        secuenceL.append(aux)
    full_secuenceL[s] = secuenceL

# Right model

modelR = hmm.MultinomialHMM(2, verbose=True, n_iter=20)

modelR.start_probability = np.array([0.6, 0.4])  #Numeros aleatorios
modelR.transition_probability = np.array([[0.5, 0.5],
                                          [0.5,
                                           0.5]])  #Poner numeros aleatorios
modelR.emissionprob = np.array([[0.1, 0.5, 0.4], [0.5, 0.3,
                                                  0.2]])  #Numeros aleatorios

X = np.asarray(full_secuenceL)

#lengths = list(map(lambda x : len(x), X))
X = np.hstack(X)
X = X.reshape(len(X), 1)
modelR.fit(X, lengthsL)
示例#9
0
def Question3():
    ########################################################
    #1. Create HMM with the library
    ########################################################
    states = ["State_1", "State_2"]
    n_states = len(states)

    observations = ["O1", "O2"]
    n_observations = len(observations)
    model = hmm.MultinomialHMM(n_components=n_states,
                               init_params="",
                               n_iter=50,
                               algorithm='map',
                               tol=0.00001)
    model.startprob_ = np.array([0.31, 0.69])
    model.transmat_ = np.array([[0.40, 0.60], [0.52, 0.48]])
    model.emissionprob_ = np.array([[0.49, 0.51], [0.40, 0.60]])

    ########################################################
    #2.Learn the HMM with the following sample L1 = faaabb; abaabbb; aaababb; aabab; abg}.
    ########################################################
    sequence1 = np.array([[0, 0, 0, 1, 1]]).T
    sequence2 = np.array([[0, 1, 0, 0, 1, 1, 1]]).T
    sequence3 = np.array([[0, 0, 0, 1, 0, 1, 1]]).T
    sequence4 = np.array([[0, 0, 1, 0, 1]]).T
    sequence5 = np.array([[0, 1]]).T
    sample = np.concatenate(
        [sequence1, sequence2, sequence3, sequence4, sequence5])
    print("sample: ", sample)
    lengths = [
        len(sequence1),
        len(sequence2),
        len(sequence3),
        len(sequence4),
        len(sequence5)
    ]
    model.fit(sample, lengths)
    #Moel obtained after training:
    print(model.transmat_)
    print(model.startprob_)
    print(model.emissionprob_)
    #######################################################
    #3
    #######################################################
    states = ["State_1", "State_2"]
    n_states = len(states)

    observations = ["O1", "O2"]
    n_observations = len(observations)
    model2 = hmm.MultinomialHMM(n_components=n_states,
                                init_params="",
                                n_iter=50,
                                algorithm='map',
                                tol=0.00001)
    model2.startprob_ = np.array([0.31, 0.69])
    model2.transmat_ = np.array([[0.40, 0.60], [0.52, 0.48]])
    model2.emissionprob_ = np.array([[0.49, 0.51], [0.40, 0.60]])

    sequence1 = np.array([[1, 1, 1, 0, 0]]).T
    sequence2 = np.array([[1, 0, 1, 1, 0, 0]]).T
    sequence3 = np.array([[1, 1, 1, 0, 1, 0, 0]]).T
    sequence4 = np.array([[1, 1, 0, 1, 1, 0]]).T
    sequence5 = np.array([[1, 1, 0, 0]]).T
    sample = np.concatenate(
        [sequence1, sequence2, sequence3, sequence4, sequence5])
    lengths = [
        len(sequence1),
        len(sequence2),
        len(sequence3),
        len(sequence4),
        len(sequence5)
    ]
    model2.fit(sample, lengths)
    print(model2.transmat_)
    print(model2.startprob_)
    print(model2.emissionprob_)

    #######################################################
    #5 Compute the probabilities of the strings aababbb and bbabaaa. Are the results intuitive?
    #######################################################
    sequence_1 = np.array([[0, 0, 1, 0, 1, 1, 1]]).T
    sequence_2 = np.array([[1, 1, 0, 1, 0, 0, 0]]).T
    p_extend = model.score(sequence1)
    # print("log prob for first model first sequence: ", (p_extend))
    print("prob for first model first sequence: ", np.exp(p_extend))

    p_extend = model.score(sequence2)
    # print("log prob for first model second sequence: ", (p_extend))
    print("prob for first model second sequence: ", np.exp(p_extend))

    p_extend = model2.score(sequence1)
    # print("log prob for second model first sequence: ", (p_extend))
    print("prob for second model fist sequence: ", np.exp(p_extend))

    p_extend = model2.score(sequence2)
    # print("log prob for second model second sequence: ", (p_extend))
    print("prob for second model second sequence: ", np.exp(p_extend))
示例#10
0
    # for this example
    # n_components: 3   对应三个色子
    # n_features: 1     理解一下, 这里feature和symbol是不一样的含义. symbol对应色子面的八种状态
    # 初始概率: 三个色子随机抽取
    startprob = np.ones(3)
    startprob /= startprob.sum()
    # 转移矩阵: 机会均等
    transmat = np.ones((3, 3))
    transmat /= transmat.sum(axis=1)
    # 观测矩阵: 和色子面数有关系
    emissionprob = np.array([[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
                             [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0],
                             [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]])
    emissionprob /= emissionprob.sum(axis=1, keepdims=True)

    hmmdice = hmm.MultinomialHMM(n_components=3, algorithm="map")
    hmmdice.startprob_ = startprob
    hmmdice.transmat_ = transmat
    hmmdice.emissionprob_ = emissionprob
    X = np.array([1, 6, 3, 5, 2, 7, 3, 5, 2, 4, 3, 6, 1, 5, 4]).reshape(-1, 1)
    # 效果一样
    # X = np.array([[1, 6, 3, 5, 2, 7, 3, 5, 2, 4, 3, 6, 1, 5, 4]]).T
    # 问题A
    prob, rst = hmmdice.decode(X)
    logger.info("\n%s" % hmmdice.startprob_)
    logger.info("\n%s" % hmmdice.transmat_)
    logger.info("\n%s" % hmmdice.emissionprob_)
    logger.info(hmmdice.predict(X))
    # 问题B
    logger.info(prob)
    logger.info(rst)
示例#11
0
#-*-coding: utf-8 -*-
#@author:tyhj
from __future__ import division
import numpy as np
from hmmlearn import hmm

states = ['Rainy', 'Sunny']
n_states = len(states)

observations = ['walk', 'shop', 'clean']
n_observations = len(observations)

start_probability = np.array([0.6, 0.4])

transition_probability = np.array([[0.7, 0.3], [0.4, 0.6]])

emission_probability = np.array([[0.1, 0.4, 0.5], [0.6, 0.3, 0.1]])

model = hmm.MultinomialHMM(n_components=n_states, init_params='')
model.startprob_ = start_probability
model.transmat_ = transition_probability
model.emissionprob_ = emission_probability

#predict a sequence of hidden states based on visible states
bob_says = np.array([[0, 2, 1, 1, 2, 0]]).T

model = model.fit(bob_says)
logprob, alice_hears = model.decode(bob_says, algorithm='viterbi')
print 'Bob says:', ','.join(map(lambda x: observations[x], bob_says))
print 'Alice hears:', ','.join(map(lambda x: states[x], alice_hears))
for i in families:
    X = dataset.iloc[dataSelect2[i][0]:dataSelect2[i][1], 34:]
    Y = dataset.iloc[dataSelect2[i][0]:dataSelect2[i][1], 1]
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.25,
                                                        random_state=23)
    count += X_test.shape[0]

    testX = np.append(testX, X_test).reshape(count, 1000)
    testY = np.append(testY, Y_test)

    trainX = np.append(trainX, X_train).reshape(-1, 1000)
    trainY = np.append(trainY, Y_train)

    model = hmm.MultinomialHMM(n_components=10, n_iter=200, tol=0.5)
    model.fit(X_train)
    all_models.append(model)
    print("done")

le = LabelEncoder()
testY = le.fit_transform(testY)

filename = 'finalized_model.sav'
pickle.dump(all_models, open(filename, 'wb'))

filename = 'X_test.sav'
pickle.dump(testX, open(filename, 'wb'))

filename = 'Y_test.sav'
pickle.dump(testY, open(filename, 'wb'))
# X_d = np.asarray(list(map(int, X_d)))
X_d = X_d.reshape((len(X_d), 1))

#For the very moment we are creating a hidden state for every possible location within the shop. Later we can think of narrowing it down
hidden_states = {}
count = 0
for x in range(-46, 31): #XRange
	for y in range(-10, 45): #YRange
		hidden_states[count] = (x, y)
		count += 1
n_hidden_states = len(hidden_states)

# model = hmm.MultinomialHMM(n_components=n_hidden_states).fit(np.atleast_2d(X_d).T, len_samples)
print(len_samples)
print('Starting Training...')
model = hmm.MultinomialHMM(n_components=n_hidden_states).fit(X_d, len_samples)
print('Finished Training!')
model.monitor_
model.monitor_.converged
filename = 'model_all.pkl'
joblib.dump(model, filename)
stop = timeit.default_timer()
print(stop-start)

df_test = pd.read_csv('modified_training_day_log.csv')
for i in range(len(macs)):
    # df1 = df_test[(df_test['controllerid'] == int(cid)) & (df_test['mac'] == macs[i])]
    df1 = df_test[(df_test['mac'] == macs[i])]
    df1.reset_index(inplace=True, drop=True)
    X = np.asarray(df1['pwr'])
    X_d = le.transform(X)
示例#14
0
文件: main.py 项目: wtong98/BayesClef
if TYPE_GENERATOR == 'hmm':
    if not os.path.exists(HMM_PATH):
        word_to_label = {}

        vocab = score_word_to_vec.vocab()
        for word in vocab:
            idx = vocab[word].index
            word_to_label[word] = labels[idx]

        def _text_to_seq(text):
            return np.array([[word_to_label[word]] for word in text])

        sequences = [_text_to_seq(text) for text in myScoreToWord.scores]

        # Now actually train
        type_gen_model = hmm.MultinomialHMM(n_components=16)
        lengths = [len(seq) for seq in sequences]
        sequences = np.concatenate(sequences)
        type_gen_model.fit(sequences, lengths=lengths)
        print(type_gen_model.transmat_)
        with open(HMM_PATH, "wb") as file:
            pickle.dump(type_gen_model, file)
    else:
        type_gen_model = None
        with open(HMM_PATH, "rb") as file:
            type_gen_model = pickle.load(file)
elif TYPE_GENERATOR == 'gru':
    if not os.path.exists(GRU_PATH):
        word_to_label = {}
        vocab = score_word_to_vec.vocab()
        for word in vocab:
示例#15
0
#coding=utf-8
'''
Created on 2018-1-22

@author: 10205025
'''
import numpy as np
from hmmlearn import hmm

# 这里假设隐藏层数量为5个
model = hmm.MultinomialHMM(n_components=3,
                           verbose=True,
                           n_iter=1000,
                           tol=0.001)
# model = hmm.GaussianHMM(n_components=3, n_iter=1000, tol=0.1,covariance_type="full", verbose=True)

X1 = np.array([[2], [1], [0]])
X2 = np.array([[2], [1], [0], [2]])
X3 = np.array([[2], [1], [1]])
X4 = np.array([[2], [1], [0]])
X5 = np.array([[1], [2], [0]])

X = np.vstack((X1, X2, X3, X4, X5))
print(X)
# [[2]
#  [1]
#  [0]
#  [2]
#  [1]
#  [0]
#  [2]
示例#16
0
def Question2():
    #1. Compute the probability of the string abbaa
    states = ["State_1", "State_2", "State_3"]
    n_states = len(states)

    observations = ["O1", "O2", "O3"]
    n_observations = len(observations)
    model = hmm.MultinomialHMM(n_components=n_states,
                               n_iter=10,
                               algorithm='map',
                               tol=0.00001)
    model.startprob_ = np.array([0.5, 0.3, 0.2])
    model.transmat_ = np.array([[0.45, 0.35, 0.20], [0.10, 0.50, 0.40],
                                [0.15, 0.25, 0.60]])
    model.emissionprob_ = np.array([[1, 0], [0.5, 0.5], [0, 1]])

    sequence1 = np.array([[0, 1, 1, 0, 0]]).T
    logproba = model.score(sequence1)
    print("log probability of the string abbaa: ", logproba)
    print("probability of the string abbaa: ", np.exp(logproba))

    #############################################################################

    #2. Apply BaumWelch with only one iteration and check the probability of the string
    model = hmm.MultinomialHMM(n_components=n_states,
                               init_params="",
                               n_iter=1,
                               algorithm='map',
                               tol=0.00001)
    model.startprob_ = np.array([0.5, 0.3, 0.2])
    model.transmat_ = np.array([[0.45, 0.35, 0.20], [0.10, 0.50, 0.40],
                                [0.15, 0.25, 0.60]])
    model.emissionprob_ = np.array([[1, 0], [0.5, 0.5], [0, 1]])

    model.fit(sequence1)
    p_extend = model.score(sequence1)
    print("log One Iteration BaumWelch: ", p_extend)
    print("One Iteration BaumWelch: ", np.exp(p_extend))
    ###############################################################################
    #3. Do the same thing after 15 iterations
    ###############################################################################
    model = hmm.MultinomialHMM(n_components=n_states,
                               init_params="",
                               n_iter=15,
                               algorithm='map',
                               tol=0.00001)
    model.startprob_ = np.array([0.5, 0.3, 0.2])
    model.transmat_ = np.array([[0.45, 0.35, 0.20], [0.10, 0.50, 0.40],
                                [0.15, 0.25, 0.60]])
    model.emissionprob_ = np.array([[1, 0], [0.5, 0.5], [0, 1]])

    model.fit(sequence1)
    p_extend = model.score(sequence1)
    print("log 15 Iterations BaumWelch: ", (p_extend))
    print("15 Iterations BaumWelch: ", np.exp(p_extend))
    ###############################################################################
    #4. Try to obtain the result at convergence
    ###############################################################################
    model4 = hmm.MultinomialHMM(n_components=n_states,
                                init_params="",
                                n_iter=150,
                                algorithm='map',
                                tol=0.00000001)
    model4.startprob_ = np.array([0.5, 0.3, 0.2])
    model4.transmat_ = np.array([[0.45, 0.35, 0.20], [0.10, 0.50, 0.40],
                                 [0.15, 0.25, 0.60]])
    model4.emissionprob_ = np.array([[1, 0], [0.5, 0.5], [0, 1]])

    model4.fit(sequence1)
    p_extend = model4.score(sequence1)
    print("log At convergence BaumWelch: ", (p_extend))
    print("At convergence BaumWelch: ", np.exp(p_extend))
    ###############################################################################
    #5. Now create an HMM with 5 states with parameters initialized at any non zero correct values.
    ###############################################################################
    model = hmm.MultinomialHMM(n_components=5,
                               init_params="",
                               n_iter=120,
                               algorithm='map',
                               tol=0.00000001)
    model.startprob_ = np.array([0.5, 0.2, 0.1, 0.1, 0.1])
    model.transmat_ = np.array([[0.40, 0.30, 0.10, 0.10, 0.10],
                                [0.5, 0.10, 0.30, 0.05, 0.05],
                                [0.10, 0.20, 0.60, 0.05, 0.05],
                                [0.30, 0.40, 0.10, 0.10, 0.10],
                                [0.25, 0.25, 0.25, 0.10, 0.15]])
    model.emissionprob_ = np.array([[1, 0], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5],
                                    [0, 1]])

    model.fit(sequence1)
    p_extend = model.score(sequence1)
    print("log At 5 states BaumWelch: ", (p_extend))
    print("At 5 states BaumWelch: ", np.exp(p_extend))
示例#17
0
    train.append(trainsample[:800])
    test.append(trainsample[800:])

train = np.asarray(train)
test = np.asarray(test)
test = np.concatenate(test, axis=0)
#test = np.ravel(test)
print(train.shape, test.shape)
hidden_state = 2
# symbols = 26
# pi = np.random.dirichlet(np.ones(hidden_state), size=1)
# A = np.random.dirichlet(np.ones(hidden_state), size=hidden_state)
# B = np.random.dirichlet(np.ones(symbols), size=hidden_state)

model = hmm.MultinomialHMM(n_components=hidden_state,
                           init_params='ste',
                           n_iter=10)
# model.startprob_ = pi
# model.transmat_ = A
# model.emissionprob_ = B
score = []
counter = 0
for cipher in train:
    input = np.concatenate(cipher, axis=0)

    print("number of ciphertxt: ", counter)
    hidden_state = 26
    # symbols = len(input)
    # pi = np.random.dirichlet(np.ones(hidden_state), size=1)
    # A = np.random.dirichlet(np.ones(hidden_state), size=hidden_state)
    # B = np.random.dirichlet(np.ones(symbols), size=hidden_state)
示例#18
0
def repeat_hmm_cv_simulation(X_fit, par_rep, n_cell, n_bin, n_trial, n_components=4, n_folds=5, n_iter=500, tol=1e-05, matlab_=False, eng=None, bin_size=1, time_state=np.array([1,1,1]), options=None, path_matlab=None):
    """
    """
    n_rep = len(par_rep) 
    n_symbols = len(np.unique(X_fit))
    symbols = np.unique(X_fit)
    score_rep = np.zeros(n_rep) #BIC or loglikelihood
    last_score = -np.inf
    
    for i_rep in range(n_rep):
        random_state = par_rep[i_rep]
        init_params = 'e'
        startprob_prior = np.concatenate([[1], np.zeros(n_components-1)])
        emissionprob_prior = np.concatenate([np.ones([n_components,1]), np.zeros([n_components, n_symbols-1])], axis=1) + np.abs(np.random.randn(n_components, n_symbols)*.2)
        transmat_prior = np.identity(n_components) + np.abs(np.random.randn(n_components, n_components)*.2)
        for i in range(n_components):
            emissionprob_prior[i] = summingto1(emissionprob_prior[i])
            transmat_prior[i] = summingto1(transmat_prior[i])
        
        if n_folds == 1:
            kf = [(range(n_trial), range(n_trial))]
        else:
            kf = KFold(n_trial, n_folds=n_folds, shuffle=True, random_state=random_state)

        if not matlab_: 
            model_ = hmm.MultinomialHMM(n_components=n_components, n_iter=n_iter, random_state=random_state, startprob_prior=startprob_prior, transmat_prior=transmat_prior, tol=tol, init_params=init_params, algorithm='viterbi')        
        else:
            model_ = None
        
        logprob_pertrial = []
        logprob_pertrial += [[] for _ in range(n_folds)]
        
        logprob_ = np.zeros(n_folds)
        bic_ = np.zeros(n_folds)
        score_fr_ = np.zeros(n_folds)
        sum_stable_state_ = np.zeros([n_folds, n_components, time_state.shape[0]])
        Z_ = np.zeros(n_trial*n_bin) - 1
        state_prob_ = np.zeros([n_trial*n_bin, n_components])
        for i_fold, (train_index, test_index) in enumerate(kf):
            ######## fit
            trial_train_index = []
            trial_train_index += [range(i_trial*n_bin,(i_trial+1)*n_bin) for i_trial in train_index]
            if not matlab_:
                #------------- hmmlearn
                trial_train_index = np.hstack(trial_train_index)
                length = np.repeat(n_bin, len(train_index))
                model_.fit(X_fit[trial_train_index], length)
                pred_fr = model_.emissionprob_[:,1:] / bin_size
            else:
                #----------- hmmtrain
                X_fit_matrix = []
                X_fit_matrix += [X_fit[i] for i in trial_train_index]
                X_fit_matrix = np.array(X_fit_matrix)
                print "Spoken cell i_fold: ", np.unique(X_fit_matrix)
                transmat, emissionprob = hmmtrain_matlab(X_fit_matrix, transmat_prior, emissionprob_prior, symbols, tol, n_iter, eng, path_matlab)
                pred_fr = emissionprob[:,1:] / bin_size
                trial_train_index = np.hstack(trial_train_index)
            ######## predict
            trial_test_index = []
            trial_test_index += [range(i_trial*n_bin,(i_trial+1)*n_bin) for i_trial in test_index]
            print "rep:%d --- fold:%d" % (i_rep, i_fold)
            #print "Train trial: ", train_index
            #print "Test trial: ", test_index
            if not matlab_:
                #------------hmmlearn
                #trial_test_index = np.hstack(trial_test_index)
                #length_test = np.repeat(n_bin, len(test_index))
                #X_predict = X_fit[trial_test_index]
                #Z_[trial_test_index] = model_.predict(X_predict, length_test)
                #logprob_[i_fold], state_prob_[trial_test_index,:] = model_.score_samples(X_predict, length_test)
                ####### One trial per time
                for i, i_trial in enumerate(test_index):
                    Z_[trial_test_index[i]] = model_.predict(X_fit[trial_test_index[i]])
                    tmp, state_prob_[trial_test_index[i],:] = model_.score_samples(X_fit[trial_test_index[i]])
                    logprob_pertrial[i_fold] += [tmp]
                logprob_[i_fold] = np.mean(logprob_pertrial[i_fold])
                trial_test_index = np.hstack(trial_test_index)
            else:
                #-----------hmmtrain
                for i, i_trial in enumerate(test_index):
                    state_prob_[trial_test_index[i],:], tmp, Z_[trial_test_index[i]] = hmmdecode_viterbi_matlab(X_fit[trial_test_index[i]], transmat, emissionprob, symbols, eng, path_matlab)
                    logprob_pertrial[i_fold] += [tmp]
                    #Z_[trial_test_index[i]] = hmmviterbi_matlab(X_fit[trial_test_index[i]], transmat, emissionprob, symbols, eng)
                logprob_[i_fold] = np.mean(logprob_pertrial[i_fold])       
                trial_test_index = np.hstack(trial_test_index)
            print "Still to test: ", np.sum(Z_ == -1)
            print "check train/test: ", np.unique(np.diff(np.sort(np.concatenate([trial_test_index,trial_train_index]))))            
            #score_fr_[i_fold] = score_firing_rate(options, pred_fr)
            bic_[i_fold] = logprob_[i_fold] - ((n_components**2 + n_components*(n_symbols-2)) / 2 * np.log(n_bin*len(test_index)))
            sum_stable_state_[i_fold,:,:] = stability_matrix(state_prob_[trial_test_index,:], Z_[trial_test_index], n_bin, len(test_index), bin_size, time_state)
                
        score_rep[i_rep] = np.mean(bic_)
        print "Score BIC: ", score_rep[i_rep]
        print "Score firing rate: ", score_fr_.mean()
        print "Stable state: ", squeeze_stable_state(sum_stable_state_)*100
        if score_rep[i_rep] > last_score: #change variable to mean according to the score measure
            if matlab_:
                Z_ = Z_ - 1#matlab start from 1
            Z = Z_
            logprob = logprob_
            state_prob = state_prob_
            bic = bic_
            model = model_
            sum_stable_state = sum_stable_state_
            score_fr = score_fr_
            last_score = score_rep[i_rep]
    
    print "-----------------------------------"
    print "Mean BIC: ", last_score
    print "Mean score firing rate: ", np.mean(score_fr)
    print "std score firing rate: ", np.std(score_fr)
    print "logLik: ", logprob
    print "BIC: ", bic
    #print "stable state sum: ", sum_stable_state
    
    return model, Z, state_prob, sum_stable_state, logprob, bic, score_rep, last_score
示例#19
0
start_probability = np.array([0.2, 0.4, 0.4])

transition_probability = np.array([
  [0.5, 0.2, 0.3],
  [0.3, 0.5, 0.2],
  [0.2, 0.3, 0.5]
])

emission_probability = np.array([
  [0.5, 0.5],
  [0.4, 0.6],
  [0.7, 0.3]
])

model = hmm.MultinomialHMM(n_components=n_states)
model.startprob_=start_probability
model.transmat_=transition_probability
model.emissionprob_=emission_probability



#解码问题: 给定模型参数和观测序列,求隐藏状态序列
  #方法1 decode
seen = np.array([[0,1,0]]).T
logprob, box = model.decode(seen, algorithm="viterbi")
print("The ball picked:", ", ".join(map(lambda x: observations[x], seen)))  #给定的观测序列:
print("The hidden box:", ", ".join(map(lambda x: states[x], box)))    #最可能的隐藏状态序列

  #方法2 predict
box2 = model.predict(seen)
示例#20
0
		if seq[i] == "D":
			seq_data.append([2])
	return seq_data

states = ["a", "b", "d"]
n_states = 3
obs = ["A", "B", "D"]
n_obs = 3

start_arr = numpy.array([0.3333,0.3333,0.3334])
trans_mat = numpy.array([[AA,AB,AD],[BA,BB,BD],[DA,DB,DD]])
emiss_mat = numpy.array([[AA,AB,AD],[BA,BB,BD],[DA,DB,DD]])

#print(trans_mat)

model = hmm.MultinomialHMM(n_components = n_states, verbose = True, n_iter = int(n_itera), tol = 0.001, init_params = "")
model.startprob_ = start_arr
model.transmat_ = trans_mat
model.emissionprob_ = emiss_mat

model.transmat_ = model.transmat_ / model.transmat_.sum(axis=1)[:, numpy.newaxis]
model.emissionprob_ = model.emissionprob_ / model.emissionprob_.sum(axis=1)[:, numpy.newaxis]

#path = opath + "/" + timetag +  "/strings-raw.txt"

#print(path)

#print(type(emiss_mat))

#f = open(path)
lines = all_lines
示例#21
0
listA = getTimelineAnnotation(label_person_A, stepsize, timestamp_first, timestamp_last)
listB = getTimelineAnnotation(label_person_B, stepsize, timestamp_first, timestamp_last)

# X = sequence from actors
# X[actor] = sequence for each

# X = listA + listB
# lengths_list = [len(listA), len(listB)]


X = listA + listB
lengths_list = [len(listA), len(listB)]

# make a new HMM
hmm_all = hmm.MultinomialHMM(n_components=num_components)
model_all = hmm_all.fit(X)
prediction_all = hmm_all.predict(X)
decode_all = hmm_all.decode(X)

hmm_a = hmm.MultinomialHMM(n_components=num_components)
model_a = hmm_a.fit(listA)
prediction_a = hmm_a.predict(listA)

hmm_b = hmm.MultinomialHMM(n_components=num_components)
model_b = hmm_b.fit(X)
prediction_b = hmm_b.predict(X)

customers_same = []
all_same = []
overall_graph = []
示例#22
0
# Example of fitted HMM and sampling
# 1- we create an HMM fitting it from data
# 2- we extract some samples from the fitted model

import numpy as np
from hmmlearn import hmm
np.random.seed(42)

states = ["Rainy", "Sunny"]
n_states = len(states)

observations = ["walk", "shop", "clean"]
n_observations = len(observations)

train_data = [0, 2, 1, 1, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0]

model = hmm.MultinomialHMM(n_components=n_states, n_iter=100)
model.fit(np.array([train_data]).T)

print "start probs: ", model.startprob_
print "transmat: ", model.transmat_
print "emissionprob_", model.emissionprob_

X, Z = model.sample(5)
print "X:", X
print "Z:", Z

print "States:", ", ".join(map(lambda x: states[x], Z))
print "Performed actions:", ", ".join(
    map(lambda x: observations[x], np.squeeze(np.asarray(X))))
示例#23
0
from hmmlearn import hmm
import pickle

from text_analysis import TextAnalysis

input_dir = "./datasets/livedoor/dokujo-tsushin/"
input_data = input_dir + "dokujo-tsushin-4778030.txt"

X = TextAnalysis.mecab_analysis(input_data)

# verbose=Trueで各回のイテレーションを確認できる.
# model = hmm.MultinomialHMM(n_components=10, n_iter=1000, verbose=True)
model = hmm.MultinomialHMM(n_components=10, n_iter=1000)

model.fit(X)

L, Z = model.decode(X)
# print(model.transmat_) # 遷移確率の出力
# print(model.monitor_) # historyの配列は最後から2つの対数尤度を出力している.
sample = model.sample(n_samples=100)

# 辞書の読み込み
with open('./datasets/livedoor/livedoor_dict.pkl', 'rb') as f:
    livedoor_dict = pickle.load(f)

# モデルからサンプルしてテキスト生成
sample_id = sample[0].flatten()
sample_text = ""
for id in sample_id:
    for key in livedoor_dict:
        if id == livedoor_dict[key]:
示例#24
0
import numpy as np
import hmmlearn.hmm as hmm
import math

status = ['吃', '睡']  # 状态序列
observation = ['哭', '没精神', '找妈妈']  # 观测序列
n_status = len(status)
n_observation = len(observation)
start_probability = np.array([0.3, 0.7])  # 初始状态分布
# 状态转移概率矩阵
transition_probability = np.array([[0.1, 0.9], [0.8, 0.2]])
# 观测生成矩阵
emission__probability = np.array([[0.7, 0.1, 0.2], [0.3, 0.5, 0.2]])
# HMM模型构建
model = hmm.MultinomialHMM(n_components=n_status)
model.startprob_ = start_probability
model.transmat_ = transition_probability
model.emissionprob_ = emission__probability
# 行为模型
Actions = np.array([[0, 1, 2]])
Action_model = Actions.T

score = model.score(Action_model, lengths=None)
Action = ','.join(map(lambda x: observation[x], Actions[0]))
print("\t\"", Action, "\"的概率为:", end='')
print('\t', math.exp(score) * 100, '%')

# 所有观测值状态转移概率
predict_proba = model.predict_proba(Action_model, lengths=None)
# 维特比算法估计最可能的状态
示例#25
0
def main():
    le = preprocessing.LabelEncoder()
    x = np.array([])
    x_len = np.array([])

    line_cache = linecache.getlines(train_file)
    count = len(line_cache)
    number = int(count / chunk_lines)
    print(count)
    print(number)

    t()
    pool = mp.Pool(processes=10)
    jobs = []
    for i in range(10):
        jobs.append(
            pool.apply_async(
                read_distributed,
                line_cache[i * chunk_lines:i * chunk_lines + chunk_lines]))
    # jobs.append(pool.apply_async(read_distributed, line_cache[number * chunk_lines : count]))
    for job in jobs:
        x = np.append(x, job.get()[0])
        x_len = np.append(x_len, job.get()[1])
    pool.close()

    labels = []
    for number in x:
        if number in labels:
            pass
        else:
            labels.append(number)

    # print(labels)
    le.fit(labels)

    print('**************************************')
    t()
    print(le.classes_)
    model_le_name = MODEL_PATH + 'le.pkl'
    with open(model_le_name, 'wb') as model_file:
        pickle.dump(le, model_file)
    print("le saved")

    x = x[:, np.newaxis]

    new_x = le.transform(x)
    X = np.array(new_x).astype('int32')
    # X = X[:, np.newaxis]
    X = X.reshape(-1, 1)
    # print(X.shape)
    # print(X.dtype)
    #
    print(X)
    print(len(X))
    #
    # print(x_len.shape)
    # print(x_len.dtype)
    X_len = np.array(x_len).astype('int32')

    # print(X_len.shape)
    # print(X_len.dtype)
    print(sum(X_len))

    number_of_status = 100
    print('¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥¥')
    t()
    print('Start Training')
    model = hmm.MultinomialHMM(n_components=number_of_status,
                               n_iter=10000,
                               tol=0.01,
                               verbose=True)
    model.fit(X, X_len)
    # print(model.score(x,x_len))
    print('**************************************')
    print(model.transmat_)
    model_name = MODEL_PATH + 'hmm.pkl'
    with open(model_name, 'wb') as model_file:
        pickle.dump(model, model_file)
    print("hmm saved")
示例#26
0
if __name__ == "__main__":
    n_states = 4
    data_filename = [
        'data/LBG_VQ/train0.txt', 'data/LBG_VQ/train1.txt',
        'data/LBG_VQ/train2.txt', 'data/LBG_VQ/train3.txt',
        'data/LBG_VQ/train4.txt', 'data/LBG_VQ/train5.txt',
        'data/LBG_VQ/train6.txt', 'data/LBG_VQ/train7.txt',
        'data/LBG_VQ/train8.txt', 'data/LBG_VQ/train9.txt'
    ]
    best_models_set = []
    for i in range(10):
        #多维观测序列
        scores_set = []
        onemodel_parms_set = []
        model = hmm.MultinomialHMM(n_components=n_states, n_iter=500, tol=0.01)
        O = train_data_create(data_filename[i])  #观测序列
        print("##########", i, i, i, "##########")
        print(O.shape)
        for j in range(10):
            #进行十次训练,取得分最高的模型,减少收敛至局部极大值的影响
            model.fit(O)
            model_parms = {}
            model_parms['pi'] = model.startprob_
            model_parms['A'] = model.transmat_
            model_parms['B'] = model.emissionprob_
            onemodel_parms_set.append(model_parms)
            scores_set.append(model.score(O))

        max_index = scores_set.index(max(scores_set))
        print("数字%d的最佳模型索引%d" % (i, max_index))
transm = np.array([[0.8, 0.2], [0.4, 0.6]])
emission_probability = {
    'healthy': {
        'no-symptoms': 0.6,
        'cold': 0.3,
        'dizzy': 0.1
    },
    'sick': {
        'no-symptoms': 0.1,
        'cold': 0.3,
        'dizzy': 0.6
    },
}
emism = np.array([[0.6, 0.3, 0.1], [0.1, 0.3, 0.6]])

hmm_model = hmm.MultinomialHMM(n_components=len(states), algorithm='viterbi')
hmm_model.startprob_ = startm
hmm_model.transmat_ = transm
hmm_model.emissionprob_ = emism

#Evaluation: given a model, what is the probability of sequence y?
#Note that the score method produces the log likelihood, so to get prob, exponentiate
y = np.array([[0]])
print('Probability of first observation in a sequence being', observations[0],
      'regardless of state is', math.exp(hmm_model.score(y)))
y = np.array([[1]])
print('Probability of first observation in a sequence being', observations[1],
      'regardless of state is', math.exp(hmm_model.score(y)))
y = np.array([[2]])
print('Probability of first observation in a sequence being', observations[2],
      'regardless of state is', math.exp(hmm_model.score(y)))
    [0.612, 0, 0, 0, 0.081, 0, 0.307],  #5
    [0.697, 0, 0, 0, 0.089, 0, 0.214]
])  #6

#emission states healthy(1), hospitilization(2), death(3)
emission_prob = np.array([
    [0, 0, 0],  #1
    [0, 0, 0],  #2
    [0, 0, 0],  #3
    [0, 0, 0],  #4
    [0, 0, 0.14],  #5
    [0.279, 0.325, 0.091],  #6
    [0.35, 0.357, 0.089]
])  #7
#create an instance of the model
model = hmm.MultinomialHMM(n_components=7)
model.startprob_ = startprob
model.transmat_ = transmat
model.emissionprob_ = emission_prob
#draw samples given the startprob, transmat and emissionprob
X, Z = model.sample(50)
X_array = np.array(X)
print('sampled states:', list(Z))
print()
print('sampled outcomes:', list(X_array.flatten()))

#plt the emmission data
plt.plot(X, '.-', label='observations', ms=6, mfc='orange', alpha=0.7)
#indicate emission
emit = ['healthy', 'hospitilization', 'dead']
plt.xlabel('Iteration')
示例#29
0
states = ["Gold", "Silver", "Bronze"]
n_states = len(states)

observations = ["Ruby", "Pearl", "Coral", "Sapphire"]
n_observations = len(observations)

start_probability = np.array([0.3, 0.3, 0.4])

transition_probability = np.array([[0.1, 0.5, 0.4], [0.4, 0.2, 0.4],
                                   [0.5, 0.3, 0.2]])

emission_probability = np.array([[0.4, 0.2, 0.2,
                                  0.2], [0.25, 0.25, 0.25, 0.25],
                                 [0.33, 0.33, 0.33, 0]])

model = hmm.MultinomialHMM(n_components=3)

# 直接指定pi: startProbability, A: transmationProbability 和B: emissionProbability

model.startprob_ = start_probability
model.transmat_ = transition_probability
model.emissionprob_ = emission_probability

X1 = [0, 1, 2]
X2 = [0, 0, 0]

if __name__ == '__main__':
    calculateLikelyHood(model, X1)
    optimizeStates(model, X1)
    calculateLikelyHood(model, X2)
    optimizeStates(model, X2)
示例#30
0
from hmmlearn import hmm
import numpy as np
from hmm_classifier import HMM_classifier

x = np.random.randint(0, 10, size=(300, 10, 2))
y = np.random.randint(0, 10, size=(300))

model = HMM_classifier(hmm.MultinomialHMM())
model.fit(x, y)

# Predict probability per label
pred = model.predict_proba(np.random.randint(0, 10, size=(10, 2)))

# Get label with the most high probability
pred = model.predict(np.random.randint(0, 10, size=(100, 2)))