Пример #1
0
    def tagger(self, text_list):
        hmm = HiddenMarkovModel(text_list, self.tags, self.transitions,
                                self.cslm)
        hmmtags = hmm.generateTags()  # generate list of hmm tags
        words = hmm.words  # generate list of words
        taggedTokens = []
        prevLang = "Eng"
        engTags = []
        spnTags = []
        engTag = ""
        spanTag = ""
        token = re.compile(ur'[^\w\s]', re.UNICODE)
        print "Tagging {} words".format(len(words))
        for k, word in enumerate(words):
            # check if punctuation else use hmmtag
            lang = 'Punct' if re.match(
                token, word) and not word[-1].isalpha() else hmmtags[k]
            lang = 'Num' if word.isdigit() else lang
            # check if word is NE
            if lang != "Punct":
                index = k % 1000
                if index == 0:
                    engTags = self.engClassifier.tag(words[k:k + 1000])
                    spnTags = self.spanClassifier.tag(words[k:k + 1000])
                engTag = engTags[index][1]
                spanTag = spnTags[index][1]
            else:
                engTag = "O"
                spanTag = "O"

            # mark as NE if either classifier identifies it
            if engTag != 'O' or spanTag != 'O':
                NE = "{}/{}".format(engTag, spanTag)
            else:
                NE = "O"
            # record probabilities
            if lang in ("Eng", "Spn"):
                hmmProb = round(hmm.transitions[prevLang][lang], 2)
                engProb = round(self.cslm.prob("Eng", word), 2)
                spnProb = round(self.cslm.prob("Spn", word), 2)
                totalProb = (hmmProb +
                             engProb) if lang == "Eng" else (hmmProb + spnProb)
                prevLang = lang
            else:
                hmmProb = "N/A"
                engProb = "N/A"
                spnProb = "N/A"
                totalProb = "N/A"

            taggedTokens.append((word, lang, NE, str(engProb), str(spnProb),
                                 str(hmmProb), str(totalProb)))
            #taggedTokens.append((word, lang, NE))
            #print word, lang, NE
        return taggedTokens
    def tagger(self, text_list):
        hmm = HiddenMarkovModel(text_list, self.tags, self.transitions, self.cslm)
        hmmtags = hmm.generateTags() # generate list of hmm tags
        words = hmm.words # generate list of words
        taggedTokens = []
        prevLang = "Eng"
        engTags = []
        spnTags = []
        engTag = ""
        spanTag = ""
        token = re.compile(ur'[^\w\s]', re.UNICODE)
        print "Tagging {} words".format(len(words))
        for k, word in enumerate(words):
            # check if punctuation else use hmmtag
            lang = 'Punct' if re.match(token, word) and not word[-1].isalpha() else hmmtags[k]
            lang = 'Num' if word.isdigit() else lang
            # check if word is NE
            if lang != "Punct":
              index = k % 1000
              if index == 0:
                engTags = self.engClassifier.tag(words[k:k+1000])
                spnTags = self.spanClassifier.tag(words[k:k+1000])
              engTag = engTags[index][1]
              spanTag = spnTags[index][1]
            else:
              engTag = "O"
              spanTag = "O"

            # mark as NE if either classifier identifies it
            if engTag != 'O' or spanTag != 'O':
                NE = "{}/{}".format(engTag, spanTag)
            else:
                NE = "O"
            # record probabilities
            if lang in ("Eng", "Spn"):
              hmmProb = round(hmm.transitions[prevLang][lang], 2)
              engProb = round(self.cslm.prob("Eng", word), 2)
              spnProb = round(self.cslm.prob("Spn", word), 2)
              totalProb = (hmmProb + engProb) if lang == "Eng" else (hmmProb + spnProb)
              prevLang = lang
            else:
              hmmProb = "N/A"
              engProb = "N/A"
              spnProb = "N/A"
              totalProb = "N/A"

            taggedTokens.append((word, lang, NE, str(engProb), str(spnProb), str(hmmProb), str(totalProb)))
            #taggedTokens.append((word, lang, NE))
            #print word, lang, NE
        return taggedTokens
Пример #3
0
def initML_pw():
    training_file = [
        'UPDOWN', 'DOWNUP', 'DOWNRIGHT', 'DOWNLEFT', 'UPRIGHT', 'UPLEFT'
    ]
    all_models = []
    for file in training_file:
        file_name = 'training_sourse/' + file + '.pickle'
        with open(file_name, 'rb') as f:
            obs = pickle.load(f)
        temp = {'X': list(), 'Y': list()}
        for j in range(0, 100):
            temp['X'].append(obs['X'][j])
            temp['Y'].append(obs['Y'][j])
        if file == 'CIRCLE':
            all_models.append(HiddenMarkovModel(4, file, temp))
        else:
            all_models.append(HiddenMarkovModel(2, file, temp))
    return all_models
Пример #4
0
 def angList(self, text_list):
     hmm = HiddenMarkovModel(text_list, self.cslm)
     ang = ""
     ang_list = []
     for token, tag in zip(text_list, hmm.ang):
         if tag == "Yes":
             ang = " ".join([ang, token])
             continue
         else:
             if ang != "":
                 ang_list.append(ang.strip())
                 ang = ""
     return ang_list
Пример #5
0
def initML_main(dict):

    # convert keys of gesture dictionary to a list
    ges_list = []
    for key, value in dict.items():
        if value and value > 0:
            ges_list.append(key.text().upper())

    all_models = []
    for file in ges_list:
        file_name = 'training_sourse/' + file + '.pickle'
        with open(file_name, 'rb') as f:
            obs = pickle.load(f)
        temp = {'X': list(), 'Y': list()}
        for j in range(0, 100):
            temp['X'].append(obs['X'][j])
            temp['Y'].append(obs['Y'][j])
        if file == 'CIRCLE':
            all_models.append(HiddenMarkovModel(4, file, temp))
        else:
            all_models.append(HiddenMarkovModel(2, file, temp))
    return dict, all_models
    def compute_probs(user_df):
        dayGrouping = pd.Grouper(key="date", freq="1D")
        weekGrouping = pd.Grouper(key="date", freq="1W")
        timeGrouping = user_df.groupby(weekGrouping)

        # print("Starting on user: "******"feature"].values

            if len(seq) < 1:
                # If there is no activity for this week
                logProbScores.append(0)
                continue

            if timesTrained > trainingPeriod:
                logProb = model.sequence_log_probability(seq)
                logProbScores.append(-logProb)

                #Train the model on the sequence we have just seen
            model.learn(seq,
                        max_iters=20,
                        threshold=0.01,
                        restart_threshold=0.1,
                        max_restarts=5,
                        inertia=inert)
            matrices.append(
                mm(model.transitions, model.emissions, model.starts))
            timesTrained += 1

        return (logProbScores, matrices)
Пример #7
0
 def tag(self, text_list):
     # annotation_lists = []
     hmm = HiddenMarkovModel(text_list, self.cslm)
     annotation_lists = zip(text_list, hmm.lemmas, hmm.lang, hmm.NE, hmm.ang, hmm.engProbs, hmm.spnProbs)
     return annotation_lists
Пример #8
0
# Ari Chadda
# PA6 CS76 - 11/10/20

from HiddenMarkovModel import HiddenMarkovModel
from Maze import Maze

if __name__ == "__main__":

    # maze options for test
    # test_maze = Maze("maze1.maz")
    test_maze = Maze("maze2.maz")

    maze_solver = HiddenMarkovModel(test_maze) # instantiating solver object
    maze_solver.particle_filtering() # calling filtering algorithm

Пример #9
0
import numpy as np
import matplotlib.pyplot as plt
from HiddenMarkovModel import HiddenMarkovModel

PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__))
DATA_FILE = os.path.join(PROJECT_ROOT,
                         "../simulation/data/20200304_192852.json.labeled")

with open(DATA_FILE, 'r') as f:
    string = f.read()
    data = json.loads(string)

# Initialize A and B Matrix
states = {'left': 0, 'none': 1, 'right': 2}

model = HiddenMarkovModel(states)
model.initialize()

# fig, axs = plt.subplots(7, 7, sharex='col', sharey='row')
# for k in data.keys():
#     key_list = [i for i in list(data[k].keys())
#                 if i != 'speed' and i != 'label']
#     for idx, e in enumerate(key_list):
#         for idxx, ee in enumerate(key_list):
#             axs[idx][idxx].plot(data[k][ee], data[k][e], '.')
#             axs[idx][idxx].set_xlabel(ee)
#             axs[idx][idxx].set_ylabel(e)
#     fig.suptitle(k)
#
# plt.show()
Пример #10
0
training_infrequent_words = dataPreProcessor.identify_infrequent_words()
trainSet = dataPreProcessor.tag_capital_words(training_infrequent_words,
                                              trainSet)
trainSet = dataPreProcessor.tag_UNI_ing_words(training_infrequent_words,
                                              trainSet)
trainSet = dataPreProcessor.tag_numbers(training_infrequent_words, trainSet)

testing_infrequent_words = dataPreProcessor.identify_infrequent_words_in_testing_corpus(
)
testSet = dataPreProcessor.tag_capital_words(testing_infrequent_words, testSet)
testSet = dataPreProcessor.tag_UNI_ing_words(testing_infrequent_words, testSet)
testSet = dataPreProcessor.tag_numbers(testing_infrequent_words, testSet)

# create an instance of the HHM and passed the training set to generate its parameters.
hiddenMarkovModel = HiddenMarkovModel(testSet)
hiddenMarkovModel.calculate_transition_prob_for_POS_tags()
hiddenMarkovModel.calculate_emission_prob()

unified_test_set = [tup for sent in testSet for tup in sent]
test_set_tags = [t for (_, t) in unified_test_set]

viterbi = Viterbi(hiddenMarkovModel)
viterbi_tags = []
for test in testSet:
    if len(test) < 100:
        test_observations = [w for (w, _) in test]
        viterbi_tags += viterbi.tag_words(test_observations)

check = [
    v_tag for v_tag, t_tag in zip(viterbi_tags, test_set_tags)
Пример #11
0
#做成输入的标准矩阵
emi_mat = emission_matrix.as_matrix()
trans_mat = trans_matrix_reverse.as_matrix()


# In[20]:

#初始概率,只展示效果较好的初始概率,即平均
allNumber = len(trans_mat)
p0 = [1.0/allNumber for i in range(allNumber)]


# In[22]:

#定义模型
model =  HiddenMarkovModel(trans_mat, emi_mat, p0)


# In[23]:

#模型训练
states_seq, state_prob = model.run_viterbi([i for i in range(len(emission_matrix))],summary=True)


# In[56]:

#回溯匹配
grid = emission_matrix.columns[states_seq]
predict = Grid_ID.loc[grid]#.values

Пример #12
0
import numpy as np
from HiddenMarkovModel import HiddenMarkovModel

print("**********Test Tnitialization **********")
# states = {0: 'sunny', 1: 'rain', 2: 'cloudy'}
states = {0: 'default', 1: 'other'}
hmm = HiddenMarkovModel(states, method='gmm', data_dim=2, gmm_k=1)
print(hmm)

print("**********Test Emission Porbability **********")
# obs = [[0, 0], [0, 0], [10, 10], [10, 10], [
#     0, 0], [10, 10], [10, 10], [0, 0], [0, 0]]

# obs= [[-1, -1], [-2, 2], [3, 3], [-1, -1], [0, 0], [-1, 1], [3, 3], [-2, 2], [1, 0]]

obs = [[1, 1], [1, 1], [1.1, 1], [0.9, 1], [1, 1], [1, 1], [1.2, 1], [0.8, 1],
       [1, 1]]
# np.random.seed(0)
# obs = np.ones((100000, 2))+np.random.rand(100000, 2)
print(obs)

hmm._init_param(np.array(obs))
print(hmm.weights)
print(hmm.means)
print(hmm.sigmas)

emitlogprob = hmm._log_emission(np.array(obs))
print(emitlogprob)

print("**********Test Foward Porbability **********")
logA = np.log(hmm.A)
Пример #13
0
    #trans.loc[key, dic_concat[key]] = e
    #trans.loc[dic[key],key] = e
    trans.loc[key, key] = 1

# In[44]:

# 取得输入emission矩阵和transition矩阵
emi_mat = emission_matrix.values
tran_mat = trans.values
#取得初始概率
allNumber = len(tran_mat)
p0 = [1.0 / allNumber for i in range(allNumber)]

# In[45]:

model = HiddenMarkovModel(tran_mat, emi_mat, p0)

# In[46]:

states_seq, state_probs = model.run_viterbi([i for i in range(len(emi_mat))],
                                            summary=True)

# In[47]:

states_seq

# In[ ]:

main = states_seq[0]
ls = [main]
for i in range(len(states_seq)):