Пример #1
0
 def __init__(self, *p):
     State.__init__(self, *p)
     self.hmm = None
     self.factory = None
     self.repeatGeneratorX = None
     self.repeatGeneratorY = None
     self.consensus = ""
     self.memoizeX = defaultdict(dict)
     self.memoizeY = defaultdict(dict)
     self.memoizeXsimple = dict()
     self.memoizeYsimple = dict()
     #self.dgmemoize = dict()
     #self.rdgmemoize = dict()
     self.consensusSampler = None
     self.durationSampler = None
     self.backgroundProbability = None
     self.time = None
     self.transitionMatrix = None
     self.consensusDistribution = None
     self.repeatLengthDistribution = None
     self.repProb = None
     self.modelversion = None
     self.trackEmissions = None
     self.x_count = 0
     self.y_count = 0
     self.cons_set = set()
     self.cons_list = list()
Пример #2
0
 def load(self, dictionary):
     State.load(self, dictionary)
     if "durations" not in dictionary:
         raise ParseException("durations were not found in GeneralizedState")
     self.durations = list(dictionary["durations"])
     for d in range(len(self.durations)):
         self.durations[d] = tuple(self.durations[d])
Пример #3
0
 def test_state_loading(self):
     a = State()
     a.load(self.inputY)
     X = a.toJSON()
     Y = self.Y
     self.assertDictEqual(X, Y, "Loading and dumping to JSON does not " + \
                          " work: " + str(X) + " != " + str(Y))
Пример #4
0
 def load(self, dictionary):
     State.load(self, dictionary)
     if "durations" not in dictionary:
         raise ParseException(
             "durations were not found in GeneralizedState")
     self.durations = list(dictionary["durations"])
     for d in range(len(self.durations)):
         self.durations[d] = tuple(self.durations[d])
Пример #5
0
 def load(self, dictionary):
     State.load(self, dictionary)
     if 'backgroundprob' not in dictionary:
         raise ParseException("Backround probability was not found in state")
     self.backgroundProbability = [tuple(x) 
                                   for x in dictionary['backgroundprob']]
     if 'time' not in dictionary:
         raise ParseException('Time was not found in state')
     self.time = dictionary['time']
     if 'transitionmatrix' not in dictionary:
         raise ParseException('Transition matrix not found in state')
     self.transitionMatrix = dictionary['transitionmatrix']
     if 'consensusdistribution' in dictionary:
         self.consensusDistribution = default_dist(normalize_dict(
             dictionary['consensusdistribution'],
             mathType=self.mathType
         ))
     else:
         self.consensusDistribution = defaultdict(lambda *x: self.mathType(1.0))
     if 'repeatlengthdistribution' in dictionary:
         tp = type(dictionary['repeatlengthdistribution'])
         if tp in [dict, defaultdict]:
             self.repeatLengthDistribution = \
                 default_dist(normalize_dict(
                     dictionary['repeatlengthdistribution'],
                     mathType=self.mathType
                 ))
         else:
             self.repeatLengthDistribution = \
                 dictionary['repeatlengthdistribution']
             self.repProb = self.repeatLengthDistribution.p
     if 'trackemissions' in dictionary:
         self.trackEmissions = dictionary['trackemissions']
     if 'version' in dictionary:
         self.version = dictionary['version']
     else:
         self.version = 'v1'
     if 'repprob' in dictionary:
         self.repProb = self.mathType(dictionary['repprob'])
     if self.version == 'v2':
         self.trackEmissions = defaultdict(lambda *_: self.mathType(1.0))
         self.trackEmissions['MM'] = self.mathType(1.0)
         self.repeatLengthDistribution = defaultdict(lambda *_: 
                                                     self.mathType(1.0))
         self.repeatLengthDistribution[10] = self.mathType(1.0)
     self.factory = RepeatProfileFactory(self.mathType, self.version,
                                         self.repProb)
     self.factory.backgroudProbability = self.backgroundProbability
     self.factory.time = self.time
     self.factory.transitionMatrix = self.transitionMatrix
Пример #6
0
 def toJSON(self):
     ret = State.toJSON(self)
     ret['backgroundprob'] = self.backgroundProbability
     ret['time'] = self.time
     ret['transitionmatrix'] = self.transitionMatrix
     if self.consensusDistribution != None:
         ret['consensusdistribution'] = \
             dist_to_json(self.consensusDistribution)
     if self.repeatLengthDistribution != None:
         ret['repeatlengthdistribution'] = \
             dist_to_json(self.repeatLengthDistribution)
     ret['trackemissions'] = self.trackEmissions
     if self.version != None:
         ret['version'] = self.version
     if self.repProb != None:
         ret['repprob'] = float(self.repProb) 
     #TODO: save consensus distribution
     return ret
Пример #7
0
 def toJSON(self):
     ret = State.toJSON(self)
     ret["durations"] = self.durations
     return ret
Пример #8
0
 def setUp(self):
     self.inputY = {
         "__name__": "State",
         "name": "name",
         "startprob": 1.0,
         "emission": [("A", 1.0), (["C", "D"], 0.5)],
         "endprob": 0.5
     } 
     Y = self.inputY
     Y["emission"][1] = (tuple(Y["emission"][1][0]), Y["emission"][1][1])
     self.Y = Y
     self.inputHMMData = dict()
     
     for mathType in self.mathTypes:
          
         hmmInit = {
             "__name__": "HMM",
             "states": [],
             "transitions": [
                 {"from": "Init", "to": "White", "prob": mathType(0.3)},
                 {"from": "Init", "to": "Black", "prob": mathType(0.7)},
                 {"from": "White", "to": "White", "prob": mathType(1.0)},
                 {"from": "Black", "to": "Black", "prob": mathType(1.0)}
             ]
         }
         Init = State(mathType)
         Init.load({
             "__name__": "State",
             "name": "Init",
             "startprob": 1.0,
             "endprob": 1.0,
             "emission": [("0", 0.5), ("1", 0.5)]
         })
         White = State(mathType)
         White.load({
             "__name__": "State",
             "name": "White",
             "startprob": 0.0,
             "endprob": 1.0,
             "emission": [("0", 0.05), ("1", 0.95)]
         })
         Black = State(mathType)
         Black.load({
             "__name__": "State",
             "name": "Black",
             "startprob": 0.0,
             "endprob": 1.0,
             "emission": [("0", 0.9), ("1", 0.1)]
         })
         hmmInit["states"] = [White, Init, Black]
         self.inputHMMData[mathType] = hmmInit
Пример #9
0
 def load(self, dictionary):
     State.load(self, dictionary)
     if 'order' not in dictionary:
         raise ParseException('order was not found in state')
     self.order = dictionary['order']
Пример #10
0
 def __init__(self, *p):
     State.__init__(self, *p)
     self.order = 0
Пример #11
0
 def toJSON(self):
     ret = State.toJSON(self)
     ret['order'] = self.order
     return ret
Пример #12
0
def createProfileHMMv1(mathType, consensus, time, backgroundProb, trans):
    length = len(consensus)
    states = []
    transitions = []
    for i in range(length):
        char = consensus[i]
        matchState = State(mathType)
        insertState = State(mathType)
        deleteState1 = GeneralizedState(mathType)
        deleteState2 = GeneralizedState(mathType)
        matchState.load({
            "__name__": "State",
            "name": "m" + str(i),
            "startprob": 0.0,
            "emission": JCModel(char, time, "ACGT"),
            "endprob": 1.0
        })
        insertState.load({
            "__name__": "State",
            "name": "i" + str(i),
            "startprob": 0.0,
            "emission": backgroundProb,
            "endprob": 1.0
        })
        deleteState1.load({
            "__name__": "GeneralizedState",
            "name": "1d" + str(i),
            "startprob": 0.0,
            "emission": [("", 1.0)],
            "endprob": 0.0,
            "durations": [(0, 1.0)]
        })
        deleteState2.load({
            "__name__": "GeneralizedState",
            "name": "2d" + str(i),
            "startprob": 0.0,
            "emission": [("", 1.0)],
            "endprob": 0.0,
            "durations": [(0, 1.0)]
        })
        states.extend([matchState, insertState, deleteState1, deleteState2])
        if i < length - 1:
            transitions.extend([
                {
                    "from": "m" + str(i),
                    "to": "m" + str(i + 1),
                    "prob": trans['MM']
                },
                {
                    "from": "m" + str(i),
                    "to": "i" + str(i + 1),
                    "prob": trans['MI']
                },
                {
                    "from": "m" + str(i),
                    "to": "1d" + str(i + 1),
                    "prob": trans['MD']
                },
                {
                    "from": "1d" + str(i),
                    "to": "1d" + str(i + 1),
                    "prob": trans['DD']
                },
                {
                    "from": "1d" + str(i),
                    "to": "m" + str(i + 1),
                    "prob": trans['DM']
                },
                {
                    "from": "1d" + str(i),
                    "to": "i" + str(i + 1),
                    "prob": trans['DI']
                },
                {
                    "from": "2d" + str(i),
                    "to": "2d" + str(i + 1),
                    "prob": trans['DD']
                },
                {
                    "from": "2d" + str(i),
                    "to": "m" + str(i + 1),
                    "prob": trans['DM']
                },
                {
                    "from": "2d" + str(i),
                    "to": "i" + str(i + 1),
                    "prob": trans['DI']
                },
            ])
        transitions.extend([
            {
                "from": "i" + str(i),
                "to": "i" + str(i),
                "prob": trans['II']
            },
            {
                "from": "i" + str(i),
                "to": "m" + str(i),
                "prob": trans['IM']
            },
            {
                "from": "i" + str(i),
                "to": "1d" + str(i),
                "prob": trans['ID']
            },
        ])
    transitions.extend([
        {
            "from": "Init",
            "to": "m0",
            "prob": trans['_M']
        },
        {
            "from": "Init",
            "to": "i0",
            "prob": trans['_I']
        },
        {
            "from": "Init",
            "to": "1d0",
            "prob": trans['_D']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "m0",
            "prob": trans['_M']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "i0",
            "prob": trans['_I']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "2d0",
            "prob": trans['_D']
        },
        {
            "from": "m" + str(length - 1),
            "to": "Init",
            "prob": 1.0 - trans['MI']
        },
        {
            "from": "m" + str(length - 1),
            "to": "i" + str(length),
            "prob": trans['MI']
        },
        {
            "from": "i" + str(length),
            "to": "i" + str(length),
            "prob": trans['II']
        },
        {
            "from": "i" + str(length),
            "to": "Init",
            "prob": 1.0 - trans['II']
        },
        {
            "from": "2d" + str(length - 1),
            "to": "m0",
            "prob": trans['_M'] / (trans['_M'] + trans['_I'])
        },
        {
            "from": "2d" + str(length - 1),
            "to": "i0",
            "prob": trans['_I'] / (trans['_M'] + trans['_I'])
        },
    ])
    insertState = State(mathType)
    insertState.load({
        "__name__": "State",
        "name": "i" + str(length),
        "startprob": 0.0,
        "emission": backgroundProb,
        "endprob": 1.0
    })
    states.append(insertState)
    initState = GeneralizedState(mathType)
    initState.load({
        "__name__": "GeneralizedState",
        "name": "Init",
        "startprob": 1.0,
        "emission": [("", 1.0)],
        "endprob": 1.0,
        "durations": [(0, 1.0)]
    })
    states.append(initState)
    hmm = GeneralizedHMM(mathType)
    hmm.load({
        "__name__": "GeneralizedHMM",
        "states": states,
        "transitions": transitions,
    })
    hmm.reorderStatesTopologically()
    nm = consensus
    if len(nm) > 20:
        nm = hashlib.md5(consensus).hexdigest()
    return hmm
Пример #13
0
 def computeHints(self, realigner):
     State.computeHints(self, realigner)
     self.precomputeRepeatGenerators(realigner)
     self.precomputeEmissionCache(realigner)
Пример #14
0
 def test_state(self):
     for numType in self.mathTypes:
         state = State(numType)
         state.load(self.inputY)
         #test duration
         X = list(state.durationGenerator())
         Y = [(1, numType(1.0))]
         self.assertEqual(X, Y, "HMM.durationGenerator() does not work: " + \
                          str(X) + " != " + str(Y))
         #test emission
         Y = numType(1.0)
         X = state.emission("AC", 0)
         self.assertAlmostEqual(X, Y, delta=1e-7, 
                                msg="HMM.emission(\"AC\", 0) does not " + \
                                "work: " + str(X) + " != " + str(Y))
         #test stateID
         for Y in range(4):
             state.setStateID(Y)
             X = state.getStateID()
             self.assertEqual(X, Y, "HMM.set/getStateID({0}) is broken." \
                              .format(Y))
         #test transitions & remap
         transitions = [(1, 1.0), (2, 0.4), (3, 0.2), (4, 0.6)]
         M = {1: 2, 2: 3, 3: 4, 4: 5}
         for (x, p) in transitions:
             state.addTransition(x, p)
             state.addReverseTransition(x, p)
         X = state.followingIDs()
         Y = transitions
         self.assertEqual(X, Y, "HMM.?transitions are not working.")
         X = state.previousIDs()
         self.assertEqual(X, Y, "HMM.?reverse transitions are not working.")
         state.remapIDs(M)
         transitions = [(M[x[0]], x[1]) for x in transitions]
         X = state.followingIDs()
         Y = transitions
         self.assertEqual(X, Y, "HMM.remapIDs() is not working.")
         X = state.previousIDs()
         self.assertEqual(X, Y, "HMM.remapIDs() is not working.")
         state.clearTransitions()
         Y = []
         X = state.followingIDs()
         X.extend(state.previousIDs())
         self.assertEqual(X, Y, "HMM.clearTransitions() is not working.")
     #test start & stop probability
     X = state.getStartProbability()
     Y = 1.0
     self.assertAlmostEqual(X, Y, delta=1e-7, 
                            msg="HMM.getStartProbability is broken.")
     X = state.getEndProbability()
     Y = 0.5
     self.assertAlmostEqual(X, Y, delta=1e-7, 
                            msg="HMM.getEndProbability is broken.")
Пример #15
0
 def toJSON(self):
     ret = State.toJSON(self)
     ret['order'] = self.order
     return ret
Пример #16
0
 def __init__(self, *p):
     State.__init__(self, *p)
     self.durations = list()
Пример #17
0
 def load(self, dictionary):
     State.load(self, dictionary)
     if 'order' not in dictionary:
         raise ParseException('order was not found in state')
     self.order = dictionary['order']
Пример #18
0
 def __init__(self, *p):
     State.__init__(self, *p)
     self.order = 0
Пример #19
0
 def __init__(self, *p):
     State.__init__(self, *p)
     self.durations = list()
Пример #20
0
def build_model(consensus, modelParam):
    global model_cache
    mathType = modelParam["mathType"]
    model_factory = modelParam["modelFactory"]
    if consensus in model_cache:
        return model_cache[consensus]
    model = model_factory.getHMM(consensus)
    repProb = model_factory.repProb
    repProb = 0.01
    original_init_states = []
    original_end_states = []
    for i in range(len(model.states)):
        if model.states[i].startProbability > 0: original_init_states.append(i)
        if model.states[i].endProbability > 0: original_end_states.append(i)    
    background_state = State(mathType)
    background_state.load({
       "__name__": "State",
        "name": "BackgroundState",
        "startprob": 0.0,
        "emission": model_factory.backgroundProbability,
        "endprob": 1.0,
    })
    background_state_id = model.addState(background_state)
    init_state = GeneralizedState(mathType)
    init_state.load({
        "__name__": "GeneralizedState",
        "name": "FinderInit",
        "startprob": 1.0,
        "emission": [("", 1.0)],
        "endprob": 0.0,
        "durations": [(0, 1.0)]
    })
    init_state_id = model.addState(init_state)
    model.addTransition(
        init_state_id,
        background_state_id,
        mathType(1.0) - repProb
    )
    model.addTransition(
        background_state_id,
        background_state_id,
        mathType(1.0) - repProb
    )
    for i in original_init_states:
        prob = model.states[i].startProbability * repProb
        model.addTransition(init_state_id, i, prob)
        model.addTransition(background_state_id, i, prob)
        model.states[i].startProbability = mathType(0.0)
    for i in original_end_states:
        model.addTransition(
            i,
            background_state_id,
            model.states[i].endProbability
        )
    model.reorderStatesTopologically()
    #for state in model.states:
    #    print state.stateName
    #model_cache[consensus] = model
    nm = consensus
    if len(nm) > 20:
        nm = hashlib.md5(consensus).hexdigest()
    with open('submodels/{0}.js'.format(consensus), 'w') as f:
        def LogNumToJson(obj):
            if isinstance(obj, LogNum):
                return '{0} {1}'.format(str(float(obj)),str(obj.value))
            raise TypeError
        json.dump(model.toJSON(), f, indent=4, sort_keys=True, 
                  default=LogNumToJson)
    return model
Пример #21
0
 def toJSON(self):
     ret = State.toJSON(self)
     ret["durations"] = self.durations
     return ret        
Пример #22
0
def createProfileHMMv2(mathType, consensus, time, backgroundProb, trans):
    if consensus == None or len(consensus) == 0:
        raise "Wrong consensus: {}".format(consensus)
    length = len(consensus)
    states = []
    transitions = []
    for i in range(length):
        char = consensus[i]
        matchState = State(mathType)
        insertState = State(mathType)
        deleteState1 = GeneralizedState(mathType)
        deleteState2 = GeneralizedState(mathType)
        matchState.load({
            "__name__": "State",
            "name": "m" + str(i),
            "startprob": 0.0,
            "emission": JCModel(char, time, "ACGT"),
            "endprob": 0.0
        })
        insertState.load({
            "__name__": "State",
            "name": "i" + str(i),
            "startprob": 0.0,
            "emission": backgroundProb,
            "endprob": 0.0
        })
        deleteState1.load({
            "__name__": "GeneralizedState",
            "name": "1d" + str(i),
            "startprob": 0.0,
            "emission": [("", 1.0)],
            "endprob": 0.0,
            "durations": [(0, 1.0)]
        })
        deleteState2.load({
            "__name__": "GeneralizedState",
            "name": "2d" + str(i),
            "startprob": 0.0,
            "emission": [("", 1.0)],
            "endprob": 0.0,
            "durations": [(0, 1.0)]
        })
        states.extend([matchState, insertState, deleteState1, deleteState2])
        if i < length - 1:
            transitions.extend([
                {
                    "from": "m" + str(i),
                    "to": "m" + str(i + 1),
                    "prob": trans['MM']
                },
                {
                    "from": "m" + str(i),
                    "to": "i" + str(i + 1),
                    "prob": trans['MI']
                },
                {
                    "from": "m" + str(i),
                    "to": "1d" + str(i + 1),
                    "prob": trans['MD']
                },
                {
                    "from": "1d" + str(i),
                    "to": "1d" + str(i + 1),
                    "prob": trans['DD']
                },
                {
                    "from": "1d" + str(i),
                    "to": "m" + str(i + 1),
                    "prob": trans['DM']
                },
                {
                    "from": "1d" + str(i),
                    "to": "i" + str(i + 1),
                    "prob": trans['DI']
                },
                {
                    "from": "2d" + str(i),
                    "to": "2d" + str(i + 1),
                    "prob": trans['DD']
                },
                {
                    "from": "2d" + str(i),
                    "to": "m" + str(i + 1),
                    "prob": trans['DM']
                },
                {
                    "from": "2d" + str(i),
                    "to": "i" + str(i + 1),
                    "prob": trans['DI']
                },
            ])
        transitions.extend([
            {
                "from": "i" + str(i),
                "to": "i" + str(i),
                "prob": trans['II']
            },
            {
                "from": "i" + str(i),
                "to": "m" + str(i),
                "prob": trans['IM']
            },
            {
                "from": "i" + str(i),
                "to": "1d" + str(i),
                "prob": trans['ID']
            },
        ])
    transitions.extend([
        {
            "from": "Init",
            "to": "m0",
            "prob": trans['_M']
        },
        {
            "from": "Init",
            "to": "i0",
            "prob": trans['_I']
        },
        {
            "from": "Init",
            "to": "1d0",
            "prob": trans['_D']
        },
        {
            "from": "Init",
            "to": "End",
            "prob": trans['_E']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "m0",
            "prob": trans['DRM']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "End",
            "prob": trans['DRE']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "i0",
            "prob": trans['DRI']
        },
        {
            "from": "1d" + str(length - 1),
            "to": "2d0",
            "prob": trans['DRD']
        },
        {
            "from": "m" + str(length - 1),
            "to": "Init",
            "prob": trans['MR_']
        },
        {
            "from": "m" + str(length - 1),
            "to": "End",
            "prob": trans['MRE']
        },
        {
            "from": "m" + str(length - 1),
            "to": "i" + str(length),
            "prob": trans['MRI']
        },
        {
            "from": "i" + str(length),
            "to": "i" + str(length),
            "prob": trans['IRI']
        },
        {
            "from": "i" + str(length),
            "to": "Init",
            "prob": trans['IR_']
        },
        {
            "from": "i" + str(length),
            "to": "End",
            "prob": trans['IRE']
        },
    ])
    insertState = State(mathType)
    insertState.load({
        "__name__": "State",
        "name": "i" + str(length),
        "startprob": 0.0,
        "emission": backgroundProb,
        "endprob": 0.0
    })
    states.append(insertState)
    initState = GeneralizedState(mathType)
    initState.load({
        "__name__": "GeneralizedState",
        "name": "Init",
        "startprob": 1.0,
        "emission": [("", 1.0)],
        "endprob": 0.0,
        "durations": [(0, 1.0)]
    })
    states.append(initState)
    endState = GeneralizedState(mathType)
    endState.load({
        "__name__": "GeneralizedState",
        "name": "End",
        "startprob": 0.0,
        "emission": [("", 1.0)],
        "endprob": 1.0,
        "durations": [(0, 1.0)],
    })
    states.append(endState)
    remstate = '2d' + str(length - 1)
    states = [state for state in states if state.stateName != remstate]
    transitions = [
        tran for tran in transitions
        if tran['to'] != remstate and tran['from'] != remstate
    ]
    hmm = GeneralizedHMM(mathType)
    hmm.load({
        "__name__": "GeneralizedHMM",
        "states": states,
        "transitions": transitions,
    })
    for i in range(len(hmm.states)):
        hmm.states[i].normalizeTransitions()
    hmm.reorderStatesTopologically()
    nm = consensus
    if len(nm) > 20:
        nm = hashlib.md5(consensus).hexdigest()
    #with Open('submodels/{0}.js'.format(nm), 'w') as f:
    #    def LogNumToJson(obj):
    #        if isinstance(obj, LogNum):
    #            return '{0} {1}'.format(str(float(obj)),str(obj.value))
    #        raise TypeError
    #    json.dump(hmm.toJSON(), f, indent=4, sort_keys=True,
    #              default=LogNumToJson)
    return hmm