Exemplo n.º 1
0
 def setUp(self):
     self.inputY = {
         "__name__": "State",
         "name": "name",
         "startprob": 1.0,
         "emission": [("A", 1.0), (["C", "D"], 0.5)],
         "endprob": 0.5
     } 
     Y = self.inputY
     Y["emission"][1] = (tuple(Y["emission"][1][0]), Y["emission"][1][1])
     self.Y = Y
     self.inputHMMData = dict()
     
     for mathType in self.mathTypes:
          
         hmmInit = {
             "__name__": "HMM",
             "states": [],
             "transitions": [
                 {"from": "Init", "to": "White", "prob": mathType(0.3)},
                 {"from": "Init", "to": "Black", "prob": mathType(0.7)},
                 {"from": "White", "to": "White", "prob": mathType(1.0)},
                 {"from": "Black", "to": "Black", "prob": mathType(1.0)}
             ]
         }
         Init = State(mathType)
         Init.load({
             "__name__": "State",
             "name": "Init",
             "startprob": 1.0,
             "endprob": 1.0,
             "emission": [("0", 0.5), ("1", 0.5)]
         })
         White = State(mathType)
         White.load({
             "__name__": "State",
             "name": "White",
             "startprob": 0.0,
             "endprob": 1.0,
             "emission": [("0", 0.05), ("1", 0.95)]
         })
         Black = State(mathType)
         Black.load({
             "__name__": "State",
             "name": "Black",
             "startprob": 0.0,
             "endprob": 1.0,
             "emission": [("0", 0.9), ("1", 0.1)]
         })
         hmmInit["states"] = [White, Init, Black]
         self.inputHMMData[mathType] = hmmInit
Exemplo n.º 2
0
 def test_state_loading(self):
     a = State()
     a.load(self.inputY)
     X = a.toJSON()
     Y = self.Y
     self.assertDictEqual(X, Y, "Loading and dumping to JSON does not " + \
                          " work: " + str(X) + " != " + str(Y))
Exemplo n.º 3
0
 def test_state(self):
     for numType in self.mathTypes:
         state = State(numType)
         state.load(self.inputY)
         #test duration
         X = list(state.durationGenerator())
         Y = [(1, numType(1.0))]
         self.assertEqual(X, Y, "HMM.durationGenerator() does not work: " + \
                          str(X) + " != " + str(Y))
         #test emission
         Y = numType(1.0)
         X = state.emission("AC", 0)
         self.assertAlmostEqual(X, Y, delta=1e-7, 
                                msg="HMM.emission(\"AC\", 0) does not " + \
                                "work: " + str(X) + " != " + str(Y))
         #test stateID
         for Y in range(4):
             state.setStateID(Y)
             X = state.getStateID()
             self.assertEqual(X, Y, "HMM.set/getStateID({0}) is broken." \
                              .format(Y))
         #test transitions & remap
         transitions = [(1, 1.0), (2, 0.4), (3, 0.2), (4, 0.6)]
         M = {1: 2, 2: 3, 3: 4, 4: 5}
         for (x, p) in transitions:
             state.addTransition(x, p)
             state.addReverseTransition(x, p)
         X = state.followingIDs()
         Y = transitions
         self.assertEqual(X, Y, "HMM.?transitions are not working.")
         X = state.previousIDs()
         self.assertEqual(X, Y, "HMM.?reverse transitions are not working.")
         state.remapIDs(M)
         transitions = [(M[x[0]], x[1]) for x in transitions]
         X = state.followingIDs()
         Y = transitions
         self.assertEqual(X, Y, "HMM.remapIDs() is not working.")
         X = state.previousIDs()
         self.assertEqual(X, Y, "HMM.remapIDs() is not working.")
         state.clearTransitions()
         Y = []
         X = state.followingIDs()
         X.extend(state.previousIDs())
         self.assertEqual(X, Y, "HMM.clearTransitions() is not working.")
     #test start & stop probability
     X = state.getStartProbability()
     Y = 1.0
     self.assertAlmostEqual(X, Y, delta=1e-7, 
                            msg="HMM.getStartProbability is broken.")
     X = state.getEndProbability()
     Y = 0.5
     self.assertAlmostEqual(X, Y, delta=1e-7, 
                            msg="HMM.getEndProbability is broken.")
Exemplo n.º 4
0
def createProfileHMMv1(mathType, consensus, time, backgroundProb, trans):
    length = len(consensus)
    states = []
    transitions = []
    for i in range(length):
        char = consensus[i]
        matchState = State(mathType)
        insertState = State(mathType)
        deleteState1 = GeneralizedState(mathType)
        deleteState2 = GeneralizedState(mathType)
        matchState.load({
            "__name__": "State",
            "name": "m" + str(i),
            "startprob": 0.0,
            "emission": JCModel(char, time, "ACGT"),
            "endprob": 1.0
        })
        insertState.load({
            "__name__": "State",
            "name": "i" + str(i),
            "startprob": 0.0,
            "emission": backgroundProb,
            "endprob": 1.0
        })
        deleteState1.load({
            "__name__": "GeneralizedState",
            "name": "1d" + str(i),
            "startprob": 0.0,
            "emission": [("", 1.0)],
            "endprob": 0.0,
            "durations": [(0, 1.0)]
        })
        deleteState2.load({
            "__name__": "GeneralizedState",
            "name": "2d" + str(i),
            "startprob": 0.0,
            "emission": [("", 1.0)],
            "endprob": 0.0,
            "durations": [(0, 1.0)]
        })
        states.extend([matchState, insertState, deleteState1, deleteState2])
        if i < length - 1:
            transitions.extend([
                {
                    "from": "m" + str(i),
                    "to": "m" + str(i + 1),
                    "prob": trans['MM']
                },
                {
                    "from": "m" + str(i),
                    "to": "i" + str(i + 1),
                    "prob": trans['MI']
                },
                {
                    "from": "m" + str(i),
                    "to": "1d" + str(i + 1),
                    "prob": trans['MD']
                },
                {
                    "from": "1d" + str(i),
                    "to": "1d" + str(i + 1),
                    "prob": trans['DD']
                },
                {
                    "from": "1d" + str(i),
                    "to": "m" + str(i + 1),
                    "prob": trans['DM']
                },
                {
                    "from": "1d" + str(i),
                    "to": "i" + str(i + 1),
                    "prob": trans['DI']
                },
                {
                    "from": "2d" + str(i),
                    "to": "2d" + str(i + 1),
                    "prob": trans['DD']
                },
                {
                    "from": "2d" + str(i),
                    "to": "m" + str(i + 1),
                    "prob": trans['DM']
                },
                {
                    "from": "2d" + str(i),
                    "to": "i" + str(i + 1),
                    "prob": trans['DI']
                },
            ])
        transitions.extend([
            {
                "from": "i" + str(i),
                "to": "i" + str(i),
                "prob": trans['II']
            },
            {
                "from": "i" + str(i),
                "to": "m" + str(i),
                "prob": trans['IM']
            },
            {
                "from": "i" + str(i),
                "to": "1d" + str(i),
                "prob": trans['ID']
            },
        ])
    transitions.extend([
        {
            "from": "Init",
            "to": "m0",
            "prob": trans['_M']
        },
        {
            "from": "Init",
            "to": "i0",
            "prob": trans['_I']
        },
        {
            "from": "Init",
            "to": "1d0",
            "prob": trans['_D']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "m0",
            "prob": trans['_M']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "i0",
            "prob": trans['_I']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "2d0",
            "prob": trans['_D']
        },
        {
            "from": "m" + str(length - 1),
            "to": "Init",
            "prob": 1.0 - trans['MI']
        },
        {
            "from": "m" + str(length - 1),
            "to": "i" + str(length),
            "prob": trans['MI']
        },
        {
            "from": "i" + str(length),
            "to": "i" + str(length),
            "prob": trans['II']
        },
        {
            "from": "i" + str(length),
            "to": "Init",
            "prob": 1.0 - trans['II']
        },
        {
            "from": "2d" + str(length - 1),
            "to": "m0",
            "prob": trans['_M'] / (trans['_M'] + trans['_I'])
        },
        {
            "from": "2d" + str(length - 1),
            "to": "i0",
            "prob": trans['_I'] / (trans['_M'] + trans['_I'])
        },
    ])
    insertState = State(mathType)
    insertState.load({
        "__name__": "State",
        "name": "i" + str(length),
        "startprob": 0.0,
        "emission": backgroundProb,
        "endprob": 1.0
    })
    states.append(insertState)
    initState = GeneralizedState(mathType)
    initState.load({
        "__name__": "GeneralizedState",
        "name": "Init",
        "startprob": 1.0,
        "emission": [("", 1.0)],
        "endprob": 1.0,
        "durations": [(0, 1.0)]
    })
    states.append(initState)
    hmm = GeneralizedHMM(mathType)
    hmm.load({
        "__name__": "GeneralizedHMM",
        "states": states,
        "transitions": transitions,
    })
    hmm.reorderStatesTopologically()
    nm = consensus
    if len(nm) > 20:
        nm = hashlib.md5(consensus).hexdigest()
    return hmm
Exemplo n.º 5
0
def createProfileHMMv2(mathType, consensus, time, backgroundProb, trans):
    if consensus == None or len(consensus) == 0:
        raise "Wrong consensus: {}".format(consensus)
    length = len(consensus)
    states = []
    transitions = []
    for i in range(length):
        char = consensus[i]
        matchState = State(mathType)
        insertState = State(mathType)
        deleteState1 = GeneralizedState(mathType)
        deleteState2 = GeneralizedState(mathType)
        matchState.load({
            "__name__": "State",
            "name": "m" + str(i),
            "startprob": 0.0,
            "emission": JCModel(char, time, "ACGT"),
            "endprob": 0.0
        })
        insertState.load({
            "__name__": "State",
            "name": "i" + str(i),
            "startprob": 0.0,
            "emission": backgroundProb,
            "endprob": 0.0
        })
        deleteState1.load({
            "__name__": "GeneralizedState",
            "name": "1d" + str(i),
            "startprob": 0.0,
            "emission": [("", 1.0)],
            "endprob": 0.0,
            "durations": [(0, 1.0)]
        })
        deleteState2.load({
            "__name__": "GeneralizedState",
            "name": "2d" + str(i),
            "startprob": 0.0,
            "emission": [("", 1.0)],
            "endprob": 0.0,
            "durations": [(0, 1.0)]
        })
        states.extend([matchState, insertState, deleteState1, deleteState2])
        if i < length - 1:
            transitions.extend([
                {
                    "from": "m" + str(i),
                    "to": "m" + str(i + 1),
                    "prob": trans['MM']
                },
                {
                    "from": "m" + str(i),
                    "to": "i" + str(i + 1),
                    "prob": trans['MI']
                },
                {
                    "from": "m" + str(i),
                    "to": "1d" + str(i + 1),
                    "prob": trans['MD']
                },
                {
                    "from": "1d" + str(i),
                    "to": "1d" + str(i + 1),
                    "prob": trans['DD']
                },
                {
                    "from": "1d" + str(i),
                    "to": "m" + str(i + 1),
                    "prob": trans['DM']
                },
                {
                    "from": "1d" + str(i),
                    "to": "i" + str(i + 1),
                    "prob": trans['DI']
                },
                {
                    "from": "2d" + str(i),
                    "to": "2d" + str(i + 1),
                    "prob": trans['DD']
                },
                {
                    "from": "2d" + str(i),
                    "to": "m" + str(i + 1),
                    "prob": trans['DM']
                },
                {
                    "from": "2d" + str(i),
                    "to": "i" + str(i + 1),
                    "prob": trans['DI']
                },
            ])
        transitions.extend([
            {
                "from": "i" + str(i),
                "to": "i" + str(i),
                "prob": trans['II']
            },
            {
                "from": "i" + str(i),
                "to": "m" + str(i),
                "prob": trans['IM']
            },
            {
                "from": "i" + str(i),
                "to": "1d" + str(i),
                "prob": trans['ID']
            },
        ])
    transitions.extend([
        {
            "from": "Init",
            "to": "m0",
            "prob": trans['_M']
        },
        {
            "from": "Init",
            "to": "i0",
            "prob": trans['_I']
        },
        {
            "from": "Init",
            "to": "1d0",
            "prob": trans['_D']
        },
        {
            "from": "Init",
            "to": "End",
            "prob": trans['_E']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "m0",
            "prob": trans['DRM']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "End",
            "prob": trans['DRE']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "i0",
            "prob": trans['DRI']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "2d0",
            "prob": trans['DRD']
        },
        {
            "from": "m" + str(length - 1),
            "to": "Init",
            "prob": trans['MR_']
        },
        {
            "from": "m" + str(length - 1),
            "to": "End",
            "prob": trans['MRE']
        },
        {
            "from": "m" + str(length - 1),
            "to": "i" + str(length),
            "prob": trans['MRI']
        },
        {
            "from": "i" + str(length),
            "to": "i" + str(length),
            "prob": trans['IRI']
        },
        {
            "from": "i" + str(length),
            "to": "Init",
            "prob": trans['IR_']
        },
        {
            "from": "i" + str(length),
            "to": "End",
            "prob": trans['IRE']
        },
    ])
    insertState = State(mathType)
    insertState.load({
        "__name__": "State",
        "name": "i" + str(length),
        "startprob": 0.0,
        "emission": backgroundProb,
        "endprob": 0.0
    })
    states.append(insertState)
    initState = GeneralizedState(mathType)
    initState.load({
        "__name__": "GeneralizedState",
        "name": "Init",
        "startprob": 1.0,
        "emission": [("", 1.0)],
        "endprob": 0.0,
        "durations": [(0, 1.0)]
    })
    states.append(initState)
    endState = GeneralizedState(mathType)
    endState.load({
        "__name__": "GeneralizedState",
        "name": "End",
        "startprob": 0.0,
        "emission": [("", 1.0)],
        "endprob": 1.0,
        "durations": [(0, 1.0)],
    })
    states.append(endState)
    remstate = '2d' + str(length - 1)
    states = [state for state in states if state.stateName != remstate]
    transitions = [
        tran for tran in transitions
        if tran['to'] != remstate and tran['from'] != remstate
    ]
    hmm = GeneralizedHMM(mathType)
    hmm.load({
        "__name__": "GeneralizedHMM",
        "states": states,
        "transitions": transitions,
    })
    for i in range(len(hmm.states)):
        hmm.states[i].normalizeTransitions()
    hmm.reorderStatesTopologically()
    nm = consensus
    if len(nm) > 20:
        nm = hashlib.md5(consensus).hexdigest()
    #with Open('submodels/{0}.js'.format(nm), 'w') as f:
    #    def LogNumToJson(obj):
    #        if isinstance(obj, LogNum):
    #            return '{0} {1}'.format(str(float(obj)),str(obj.value))
    #        raise TypeError
    #    json.dump(hmm.toJSON(), f, indent=4, sort_keys=True,
    #              default=LogNumToJson)
    return hmm
Exemplo n.º 6
0
def build_model(consensus, modelParam):
    global model_cache
    mathType = modelParam["mathType"]
    model_factory = modelParam["modelFactory"]
    if consensus in model_cache:
        return model_cache[consensus]
    model = model_factory.getHMM(consensus)
    repProb = model_factory.repProb
    repProb = 0.01
    original_init_states = []
    original_end_states = []
    for i in range(len(model.states)):
        if model.states[i].startProbability > 0: original_init_states.append(i)
        if model.states[i].endProbability > 0: original_end_states.append(i)    
    background_state = State(mathType)
    background_state.load({
       "__name__": "State",
        "name": "BackgroundState",
        "startprob": 0.0,
        "emission": model_factory.backgroundProbability,
        "endprob": 1.0,
    })
    background_state_id = model.addState(background_state)
    init_state = GeneralizedState(mathType)
    init_state.load({
        "__name__": "GeneralizedState",
        "name": "FinderInit",
        "startprob": 1.0,
        "emission": [("", 1.0)],
        "endprob": 0.0,
        "durations": [(0, 1.0)]
    })
    init_state_id = model.addState(init_state)
    model.addTransition(
        init_state_id,
        background_state_id,
        mathType(1.0) - repProb
    )
    model.addTransition(
        background_state_id,
        background_state_id,
        mathType(1.0) - repProb
    )
    for i in original_init_states:
        prob = model.states[i].startProbability * repProb
        model.addTransition(init_state_id, i, prob)
        model.addTransition(background_state_id, i, prob)
        model.states[i].startProbability = mathType(0.0)
    for i in original_end_states:
        model.addTransition(
            i,
            background_state_id,
            model.states[i].endProbability
        )
    model.reorderStatesTopologically()
    #for state in model.states:
    #    print state.stateName
    #model_cache[consensus] = model
    nm = consensus
    if len(nm) > 20:
        nm = hashlib.md5(consensus).hexdigest()
    with open('submodels/{0}.js'.format(consensus), 'w') as f:
        def LogNumToJson(obj):
            if isinstance(obj, LogNum):
                return '{0} {1}'.format(str(float(obj)),str(obj.value))
            raise TypeError
        json.dump(model.toJSON(), f, indent=4, sort_keys=True, 
                  default=LogNumToJson)
    return model