def setUp(self): self.inputY = { "__name__": "State", "name": "name", "startprob": 1.0, "emission": [("A", 1.0), (["C", "D"], 0.5)], "endprob": 0.5 } Y = self.inputY Y["emission"][1] = (tuple(Y["emission"][1][0]), Y["emission"][1][1]) self.Y = Y self.inputHMMData = dict() for mathType in self.mathTypes: hmmInit = { "__name__": "HMM", "states": [], "transitions": [ {"from": "Init", "to": "White", "prob": mathType(0.3)}, {"from": "Init", "to": "Black", "prob": mathType(0.7)}, {"from": "White", "to": "White", "prob": mathType(1.0)}, {"from": "Black", "to": "Black", "prob": mathType(1.0)} ] } Init = State(mathType) Init.load({ "__name__": "State", "name": "Init", "startprob": 1.0, "endprob": 1.0, "emission": [("0", 0.5), ("1", 0.5)] }) White = State(mathType) White.load({ "__name__": "State", "name": "White", "startprob": 0.0, "endprob": 1.0, "emission": [("0", 0.05), ("1", 0.95)] }) Black = State(mathType) Black.load({ "__name__": "State", "name": "Black", "startprob": 0.0, "endprob": 1.0, "emission": [("0", 0.9), ("1", 0.1)] }) hmmInit["states"] = [White, Init, Black] self.inputHMMData[mathType] = hmmInit
def test_state_loading(self): a = State() a.load(self.inputY) X = a.toJSON() Y = self.Y self.assertDictEqual(X, Y, "Loading and dumping to JSON does not " + \ " work: " + str(X) + " != " + str(Y))
def test_state(self): for numType in self.mathTypes: state = State(numType) state.load(self.inputY) #test duration X = list(state.durationGenerator()) Y = [(1, numType(1.0))] self.assertEqual(X, Y, "HMM.durationGenerator() does not work: " + \ str(X) + " != " + str(Y)) #test emission Y = numType(1.0) X = state.emission("AC", 0) self.assertAlmostEqual(X, Y, delta=1e-7, msg="HMM.emission(\"AC\", 0) does not " + \ "work: " + str(X) + " != " + str(Y)) #test stateID for Y in range(4): state.setStateID(Y) X = state.getStateID() self.assertEqual(X, Y, "HMM.set/getStateID({0}) is broken." \ .format(Y)) #test transitions & remap transitions = [(1, 1.0), (2, 0.4), (3, 0.2), (4, 0.6)] M = {1: 2, 2: 3, 3: 4, 4: 5} for (x, p) in transitions: state.addTransition(x, p) state.addReverseTransition(x, p) X = state.followingIDs() Y = transitions self.assertEqual(X, Y, "HMM.?transitions are not working.") X = state.previousIDs() self.assertEqual(X, Y, "HMM.?reverse transitions are not working.") state.remapIDs(M) transitions = [(M[x[0]], x[1]) for x in transitions] X = state.followingIDs() Y = transitions self.assertEqual(X, Y, "HMM.remapIDs() is not working.") X = state.previousIDs() self.assertEqual(X, Y, "HMM.remapIDs() is not working.") state.clearTransitions() Y = [] X = state.followingIDs() X.extend(state.previousIDs()) self.assertEqual(X, Y, "HMM.clearTransitions() is not working.") #test start & stop probability X = state.getStartProbability() Y = 1.0 self.assertAlmostEqual(X, Y, delta=1e-7, msg="HMM.getStartProbability is broken.") X = state.getEndProbability() Y = 0.5 self.assertAlmostEqual(X, Y, delta=1e-7, msg="HMM.getEndProbability is broken.")
def createProfileHMMv1(mathType, consensus, time, backgroundProb, trans): length = len(consensus) states = [] transitions = [] for i in range(length): char = consensus[i] matchState = State(mathType) insertState = State(mathType) deleteState1 = GeneralizedState(mathType) deleteState2 = GeneralizedState(mathType) matchState.load({ "__name__": "State", "name": "m" + str(i), "startprob": 0.0, "emission": JCModel(char, time, "ACGT"), "endprob": 1.0 }) insertState.load({ "__name__": "State", "name": "i" + str(i), "startprob": 0.0, "emission": backgroundProb, "endprob": 1.0 }) deleteState1.load({ "__name__": "GeneralizedState", "name": "1d" + str(i), "startprob": 0.0, "emission": [("", 1.0)], "endprob": 0.0, "durations": [(0, 1.0)] }) deleteState2.load({ "__name__": "GeneralizedState", "name": "2d" + str(i), "startprob": 0.0, "emission": [("", 1.0)], "endprob": 0.0, "durations": [(0, 1.0)] }) states.extend([matchState, insertState, deleteState1, deleteState2]) if i < length - 1: transitions.extend([ { "from": "m" + str(i), "to": "m" + str(i + 1), "prob": trans['MM'] }, { "from": "m" + str(i), "to": "i" + str(i + 1), "prob": trans['MI'] }, { "from": "m" + str(i), "to": "1d" + str(i + 1), "prob": trans['MD'] }, { "from": "1d" + str(i), "to": "1d" + str(i + 1), "prob": trans['DD'] }, { "from": "1d" + str(i), "to": "m" + str(i + 1), "prob": trans['DM'] }, { "from": "1d" + str(i), "to": "i" + str(i + 1), "prob": trans['DI'] }, { "from": "2d" + str(i), "to": "2d" + str(i + 1), "prob": trans['DD'] }, { "from": "2d" + str(i), "to": "m" + str(i + 1), "prob": trans['DM'] }, { "from": "2d" + str(i), "to": "i" + str(i + 1), "prob": trans['DI'] }, ]) transitions.extend([ { "from": "i" + str(i), "to": "i" + str(i), "prob": trans['II'] }, { "from": "i" + str(i), "to": "m" + str(i), "prob": trans['IM'] }, { "from": "i" + str(i), "to": "1d" + str(i), "prob": trans['ID'] }, ]) transitions.extend([ { "from": "Init", "to": "m0", "prob": trans['_M'] }, { "from": "Init", "to": "i0", "prob": trans['_I'] }, { "from": "Init", "to": "1d0", "prob": trans['_D'] }, { "from": "1d" + str(length - 1), "to": "m0", "prob": trans['_M'] }, { "from": "1d" + str(length - 1), "to": "i0", "prob": trans['_I'] }, { "from": "1d" + str(length - 1), "to": "2d0", "prob": trans['_D'] }, { "from": "m" + str(length - 1), "to": "Init", "prob": 1.0 - trans['MI'] }, { "from": "m" + str(length - 1), "to": "i" + str(length), "prob": trans['MI'] }, { "from": "i" + str(length), "to": "i" + str(length), "prob": trans['II'] }, { "from": "i" + str(length), "to": "Init", "prob": 1.0 - trans['II'] }, { "from": "2d" + str(length - 1), "to": "m0", "prob": trans['_M'] / (trans['_M'] + trans['_I']) }, { "from": "2d" + str(length - 1), "to": "i0", "prob": trans['_I'] / (trans['_M'] + trans['_I']) }, ]) insertState = State(mathType) insertState.load({ "__name__": "State", "name": "i" + str(length), "startprob": 0.0, "emission": backgroundProb, "endprob": 1.0 }) states.append(insertState) initState = GeneralizedState(mathType) initState.load({ "__name__": "GeneralizedState", "name": "Init", "startprob": 1.0, "emission": [("", 1.0)], "endprob": 1.0, "durations": [(0, 1.0)] }) states.append(initState) hmm = GeneralizedHMM(mathType) hmm.load({ "__name__": "GeneralizedHMM", "states": states, "transitions": transitions, }) hmm.reorderStatesTopologically() nm = consensus if len(nm) > 20: nm = hashlib.md5(consensus).hexdigest() return hmm
def createProfileHMMv2(mathType, consensus, time, backgroundProb, trans): if consensus == None or len(consensus) == 0: raise "Wrong consensus: {}".format(consensus) length = len(consensus) states = [] transitions = [] for i in range(length): char = consensus[i] matchState = State(mathType) insertState = State(mathType) deleteState1 = GeneralizedState(mathType) deleteState2 = GeneralizedState(mathType) matchState.load({ "__name__": "State", "name": "m" + str(i), "startprob": 0.0, "emission": JCModel(char, time, "ACGT"), "endprob": 0.0 }) insertState.load({ "__name__": "State", "name": "i" + str(i), "startprob": 0.0, "emission": backgroundProb, "endprob": 0.0 }) deleteState1.load({ "__name__": "GeneralizedState", "name": "1d" + str(i), "startprob": 0.0, "emission": [("", 1.0)], "endprob": 0.0, "durations": [(0, 1.0)] }) deleteState2.load({ "__name__": "GeneralizedState", "name": "2d" + str(i), "startprob": 0.0, "emission": [("", 1.0)], "endprob": 0.0, "durations": [(0, 1.0)] }) states.extend([matchState, insertState, deleteState1, deleteState2]) if i < length - 1: transitions.extend([ { "from": "m" + str(i), "to": "m" + str(i + 1), "prob": trans['MM'] }, { "from": "m" + str(i), "to": "i" + str(i + 1), "prob": trans['MI'] }, { "from": "m" + str(i), "to": "1d" + str(i + 1), "prob": trans['MD'] }, { "from": "1d" + str(i), "to": "1d" + str(i + 1), "prob": trans['DD'] }, { "from": "1d" + str(i), "to": "m" + str(i + 1), "prob": trans['DM'] }, { "from": "1d" + str(i), "to": "i" + str(i + 1), "prob": trans['DI'] }, { "from": "2d" + str(i), "to": "2d" + str(i + 1), "prob": trans['DD'] }, { "from": "2d" + str(i), "to": "m" + str(i + 1), "prob": trans['DM'] }, { "from": "2d" + str(i), "to": "i" + str(i + 1), "prob": trans['DI'] }, ]) transitions.extend([ { "from": "i" + str(i), "to": "i" + str(i), "prob": trans['II'] }, { "from": "i" + str(i), "to": "m" + str(i), "prob": trans['IM'] }, { "from": "i" + str(i), "to": "1d" + str(i), "prob": trans['ID'] }, ]) transitions.extend([ { "from": "Init", "to": "m0", "prob": trans['_M'] }, { "from": "Init", "to": "i0", "prob": trans['_I'] }, { "from": "Init", "to": "1d0", "prob": trans['_D'] }, { "from": "Init", "to": "End", "prob": trans['_E'] }, { "from": "1d" + str(length - 1), "to": "m0", "prob": trans['DRM'] }, { "from": "1d" + str(length - 1), "to": "End", "prob": trans['DRE'] }, { "from": "1d" + str(length - 1), "to": "i0", "prob": trans['DRI'] }, { "from": "1d" + str(length - 1), "to": "2d0", "prob": trans['DRD'] }, { "from": "m" + str(length - 1), "to": "Init", "prob": trans['MR_'] }, { "from": "m" + str(length - 1), "to": "End", "prob": trans['MRE'] }, { "from": "m" + str(length - 1), "to": "i" + str(length), "prob": trans['MRI'] }, { "from": "i" + str(length), "to": "i" + str(length), "prob": trans['IRI'] }, { "from": "i" + str(length), "to": "Init", "prob": trans['IR_'] }, { "from": "i" + str(length), "to": "End", "prob": trans['IRE'] }, ]) insertState = State(mathType) insertState.load({ "__name__": "State", "name": "i" + str(length), "startprob": 0.0, "emission": backgroundProb, "endprob": 0.0 }) states.append(insertState) initState = GeneralizedState(mathType) initState.load({ "__name__": "GeneralizedState", "name": "Init", "startprob": 1.0, "emission": [("", 1.0)], "endprob": 0.0, "durations": [(0, 1.0)] }) states.append(initState) endState = GeneralizedState(mathType) endState.load({ "__name__": "GeneralizedState", "name": "End", "startprob": 0.0, "emission": [("", 1.0)], "endprob": 1.0, "durations": [(0, 1.0)], }) states.append(endState) remstate = '2d' + str(length - 1) states = [state for state in states if state.stateName != remstate] transitions = [ tran for tran in transitions if tran['to'] != remstate and tran['from'] != remstate ] hmm = GeneralizedHMM(mathType) hmm.load({ "__name__": "GeneralizedHMM", "states": states, "transitions": transitions, }) for i in range(len(hmm.states)): hmm.states[i].normalizeTransitions() hmm.reorderStatesTopologically() nm = consensus if len(nm) > 20: nm = hashlib.md5(consensus).hexdigest() #with Open('submodels/{0}.js'.format(nm), 'w') as f: # def LogNumToJson(obj): # if isinstance(obj, LogNum): # return '{0} {1}'.format(str(float(obj)),str(obj.value)) # raise TypeError # json.dump(hmm.toJSON(), f, indent=4, sort_keys=True, # default=LogNumToJson) return hmm
def build_model(consensus, modelParam): global model_cache mathType = modelParam["mathType"] model_factory = modelParam["modelFactory"] if consensus in model_cache: return model_cache[consensus] model = model_factory.getHMM(consensus) repProb = model_factory.repProb repProb = 0.01 original_init_states = [] original_end_states = [] for i in range(len(model.states)): if model.states[i].startProbability > 0: original_init_states.append(i) if model.states[i].endProbability > 0: original_end_states.append(i) background_state = State(mathType) background_state.load({ "__name__": "State", "name": "BackgroundState", "startprob": 0.0, "emission": model_factory.backgroundProbability, "endprob": 1.0, }) background_state_id = model.addState(background_state) init_state = GeneralizedState(mathType) init_state.load({ "__name__": "GeneralizedState", "name": "FinderInit", "startprob": 1.0, "emission": [("", 1.0)], "endprob": 0.0, "durations": [(0, 1.0)] }) init_state_id = model.addState(init_state) model.addTransition( init_state_id, background_state_id, mathType(1.0) - repProb ) model.addTransition( background_state_id, background_state_id, mathType(1.0) - repProb ) for i in original_init_states: prob = model.states[i].startProbability * repProb model.addTransition(init_state_id, i, prob) model.addTransition(background_state_id, i, prob) model.states[i].startProbability = mathType(0.0) for i in original_end_states: model.addTransition( i, background_state_id, model.states[i].endProbability ) model.reorderStatesTopologically() #for state in model.states: # print state.stateName #model_cache[consensus] = model nm = consensus if len(nm) > 20: nm = hashlib.md5(consensus).hexdigest() with open('submodels/{0}.js'.format(consensus), 'w') as f: def LogNumToJson(obj): if isinstance(obj, LogNum): return '{0} {1}'.format(str(float(obj)),str(obj.value)) raise TypeError json.dump(model.toJSON(), f, indent=4, sort_keys=True, default=LogNumToJson) return model