def chooseGibbsLongRangeCRF(crf, block, xs, ys ): r""" Choose a new assignment for every variable in block from the conditional distribution p( y_{block} | y_{-block}, xs; \theta). @param block list int - List of variable indices that should be jointly updated. @param xs list string - Observation sequence @param ys list string - Tag sequence Tips: * In our model, we have a hard potential between all the variables in the block constraining them to be equal. You should only need to iterate through crf.TAGS once in order to choose a value for y_{block} (as opposed to |block| times). * You should only use the potentials between y_t and its Markov blanket. """ #import nerUtils # BEGIN_YOUR_CODE (around 23 lines of code expected) nums = [] for tag in crf.TAGS: num = 1 if 0 in block: num = num * crf.G(0, BEGIN_TAG, tag, xs) for i in range(1, len(xs)): if (i in block) and (i-1 in block): num = num * crf.G(i, tag, tag, xs) elif (i in block) and (i-1 not in block): num = num * crf.G(i, ys[i-1], tag, xs) elif (i not in block) and (i-1 in block): num = num * crf.G(i, tag, ys[i], xs) nums.append(num) nums = [i/sum(nums) for i in nums] tag = crf.TAGS[util.multinomial(nums)] for t in block: ys[t] = tag
def chooseGibbsLongRangeCRF(crf, block, xs, ys ): r""" Choose a new assignment for every variable in block from the conditional distribution p( y_{block} | y_{-block}, xs; \theta). @param block list int - List of variable indices that should be jointly updated. @param xs list string - Observation sequence @param ys list string - Tag sequence Tips: * In our model, we have a hard potential between all the variables in the block constraining them to be equal. You should only need to iterate through crf.TAGS once in order to choose a value for y_{block} (as opposed to |block| times). * You should only use the potentials between y_t and its Markov blanket. """ # BEGIN_YOUR_CODE (around 23 lines of code expected) saved_ys = ys ys = ys + [BEGIN_TAG] def computeBlockWeight(y): weight = 1. t_ = block[0] for t in block: # non-contiguous jump: account for potential ending previous section if t - t_ > 1: weight *= crf.G(t_+1, y, ys[t_+1], xs) # potential between current and previous adjacent variable weight *= crf.G(t, ys[t-1], y, xs) t_ = t if t < len(xs) - 1: weight *= crf.G(t+1, y, ys[t+1], xs) return weight # compute probabilities/weights weights = [computeBlockWeight(y) for y in crf.TAGS] norm = sum(weights) # randomly choose a tag y = util.multinomial({ y : weight/norm for y, weight in zip(crf.TAGS, weights) }) # update all the tags in the block for t in block: saved_ys[t] = y
def chooseGibbsCRF(crf, t, xs, ys ): r""" Choose a new assignment for y_t from the conditional distribution p( y_t | y_{-t} , xs ; \theta). @param t int - The index of the variable you want to update, y_t. @param xs list string - Observation seqeunce @param ys list string - Tag seqeunce Tips: * You should only use the potentials between y_t and its Markov blanket. * You don't return anything from this function, just update `ys` in place. Possibly useful: - crf.G - util.multinomial: Given a PDF as a list OR counter, util.multinomial draws a sample from this distribution; for example, util.multinomial([0.4, 0.3, 0.2, 0.1]) will return 0 with 40% probability and 3 with 10% probability. Alternatively you could use, util.multinomial({'a':0.4, 'b':0.3, 'c':0.2, 'd':0.1}) will return 'a' with 40% probability and 'd' with 10% probability. """ # BEGIN_YOUR_CODE (around 17 lines of code expected) num = [] for tag in crf.TAGS: if t == 0: m = crf.G(t, BEGIN_TAG, tag, xs) m = m * crf.G(t+1, tag, ys[t+1],xs) elif t == len(xs)-1: m = crf.G(t, ys[t-1], tag, xs) else: m = crf.G(t, ys[t-1], tag, xs) m = m * crf.G(t+1, tag, ys[t-1], xs) num.append(m) num = [i/sum(num) for i in num] ys[t] = crf.TAGS[util.multinomial(num)]
def chooseGibbsCRF(crf, t, xs, ys): r""" Choose a new assignment for y_t from the conditional distribution p( y_t | y_{-t} , xs ; \theta). @param t int - The index of the variable you want to update, y_t. @param xs list string - Observation seqeunce @param ys list string - Tag seqeunce Tips: * You should only use the potentials between y_t and its Markov blanket. * You don't return anything from this function, just update `ys` in place. Possibly useful: - crf.G - util.multinomial: Given a PDF as a list OR counter, util.multinomial draws a sample from this distribution; for example, util.multinomial([0.4, 0.3, 0.2, 0.1]) will return 0 with 40% probability and 3 with 10% probability. Alternatively you could use, util.multinomial({'a':0.4, 'b':0.3, 'c':0.2, 'd':0.1}) will return 'a' with 40% probability and 'd' with 10% probability. """ # BEGIN_YOUR_CODE (around 17 lines of code expected) num = [] for tag in crf.TAGS: if t == 0: m = crf.G(t, BEGIN_TAG, tag, xs) m = m * crf.G(t + 1, tag, ys[t + 1], xs) elif t == len(xs) - 1: m = crf.G(t, ys[t - 1], tag, xs) else: m = crf.G(t, ys[t - 1], tag, xs) m = m * crf.G(t + 1, tag, ys[t - 1], xs) num.append(m) num = [i / sum(num) for i in num] ys[t] = crf.TAGS[util.multinomial(num)]
def chooseGibbsCRF(crf, t, xs, ys ): r""" Choose a new assignment for y_t from the conditional distribution p( y_t | y_{-t} , xs ; \theta). @param t int - The index of the variable you want to update, y_t. @param xs list string - Observation seqeunce @param ys list string - Tag seqeunce Tips: * You should only use the potentials between y_t and its Markov blanket. * You don't return anything from this function, just update `ys` in place. Possibly useful: - crf.G - util.multinomial: Given a PDF as a list OR counter, util.multinomial draws a sample from this distribution; for example, util.multinomial([0.4, 0.3, 0.2, 0.1]) will return 0 with 40% probability and 3 with 10% probability. Alternatively you could use, util.multinomial({'a':0.4, 'b':0.3, 'c':0.2, 'd':0.1}) will return 'a' with 40% probability and 'd' with 10% probability. """ # BEGIN_YOUR_CODE (around 17 lines of code expected) # compute partial weights if t == 0: weights = [crf.G(t, BEGIN_TAG, y, xs) * crf.G(t+1, y, ys[t+1], xs) for y in crf.TAGS] elif t == len(xs)-1: weights = [crf.G(t, ys[t-1], y, xs) for y in crf.TAGS] else: weights = [crf.G(t, ys[t-1], y, xs) * crf.G(t+1, y, ys[t+1], xs) for y in crf.TAGS] norm = sum(weights) ys[t] = util.multinomial({ y : weight/norm for y, weight in zip(crf.TAGS, weights) })
def chooseGibbsLongRangeCRF(crf, block, xs, ys): r""" Choose a new assignment for every variable in block from the conditional distribution p( y_{block} | y_{-block}, xs; \theta). @param block list int - List of variable indices that should be jointly updated. @param xs list string - Observation sequence @param ys list string - Tag sequence Tips: * In our model, we have a hard potential between all the variables in the block constraining them to be equal. You should only need to iterate through crf.TAGS once in order to choose a value for y_{block} (as opposed to |block| times). * You should only use the potentials between y_t and its Markov blanket. """ #import nerUtils # BEGIN_YOUR_CODE (around 23 lines of code expected) nums = [] for tag in crf.TAGS: num = 1 if 0 in block: num = num * crf.G(0, BEGIN_TAG, tag, xs) for i in range(1, len(xs)): if (i in block) and (i - 1 in block): num = num * crf.G(i, tag, tag, xs) elif (i in block) and (i - 1 not in block): num = num * crf.G(i, ys[i - 1], tag, xs) elif (i not in block) and (i - 1 in block): num = num * crf.G(i, tag, ys[i], xs) nums.append(num) nums = [i / sum(nums) for i in nums] tag = crf.TAGS[util.multinomial(nums)] for t in block: ys[t] = tag