예제 #1
0
파일: ner.py 프로젝트: Pei-jie/221-project
def chooseGibbsLongRangeCRF(crf, block, xs, ys ):
    r"""
    Choose a new assignment for every variable in block from the
    conditional distribution p( y_{block} | y_{-block}, xs; \theta).

    @param block list int - List of variable indices that should be jointly updated.
    @param xs list string - Observation sequence
    @param ys list string - Tag sequence

    Tips:
    * In our model, we have a hard potential between all the variables in the
      block constraining them to be equal. You should only need to
      iterate through crf.TAGS once in order to choose a value for y_{block}
      (as opposed to |block| times).
    * You should only use the potentials between y_t and its Markov
      blanket.
    """
    #import nerUtils
    # BEGIN_YOUR_CODE (around 23 lines of code expected)
    nums = []
    for tag in crf.TAGS:
        num = 1
        if 0 in block:
            num = num * crf.G(0, BEGIN_TAG, tag, xs)
        for i in range(1, len(xs)):
            if (i in block) and (i-1 in block):
                num = num * crf.G(i, tag, tag, xs)
            elif (i in block) and (i-1 not in block):
                num = num * crf.G(i, ys[i-1], tag, xs)
            elif (i not in block) and (i-1 in block):
                num = num * crf.G(i, tag, ys[i], xs)
        nums.append(num)
    nums = [i/sum(nums) for i in nums]
    tag = crf.TAGS[util.multinomial(nums)]
    for t in block: ys[t] = tag
예제 #2
0
def chooseGibbsLongRangeCRF(crf, block, xs, ys ):
    r"""
    Choose a new assignment for every variable in block from the
    conditional distribution p( y_{block} | y_{-block}, xs; \theta).

    @param block list int - List of variable indices that should be jointly updated.
    @param xs list string - Observation sequence
    @param ys list string - Tag sequence

    Tips:
    * In our model, we have a hard potential between all the variables in the
      block constraining them to be equal. You should only need to
      iterate through crf.TAGS once in order to choose a value for y_{block}
      (as opposed to |block| times).
    * You should only use the potentials between y_t and its Markov
      blanket.
    """
    # BEGIN_YOUR_CODE (around 23 lines of code expected)
    saved_ys = ys
    ys = ys + [BEGIN_TAG]
    def computeBlockWeight(y):
        weight = 1.
        t_ = block[0]
        for t in block:
            # non-contiguous jump: account for potential ending previous section
            if t - t_ > 1:
                weight *= crf.G(t_+1, y, ys[t_+1], xs)
            # potential between current and previous adjacent variable
            weight *= crf.G(t, ys[t-1], y, xs)
            t_ = t
        if t < len(xs) - 1:
            weight *= crf.G(t+1, y, ys[t+1], xs)
        return weight
    
    # compute probabilities/weights
    weights = [computeBlockWeight(y) for y in crf.TAGS]
    norm = sum(weights)

    # randomly choose a tag
    y = util.multinomial({ y : weight/norm for y, weight in zip(crf.TAGS, weights) })

    # update all the tags in the block
    for t in block:
        saved_ys[t] = y
예제 #3
0
파일: ner.py 프로젝트: Pei-jie/221-project
def chooseGibbsCRF(crf, t, xs, ys ):
    r"""
    Choose a new assignment for y_t from the conditional distribution
    p( y_t | y_{-t} , xs ; \theta).

    @param t int - The index of the variable you want to update, y_t.
    @param xs list string - Observation seqeunce
    @param ys list string - Tag seqeunce

    Tips:
    * You should only use the potentials between y_t and its Markov
      blanket.
    * You don't return anything from this function, just update `ys`
      in place.

    Possibly useful:
    - crf.G 
    - util.multinomial: Given a PDF as a list OR counter, util.multinomial draws
      a sample from this distribution; for example,
      util.multinomial([0.4, 0.3, 0.2, 0.1]) will return 0 with 40%
      probability and 3 with 10% probability.
      Alternatively you could use,
      util.multinomial({'a':0.4, 'b':0.3, 'c':0.2, 'd':0.1}) will return 'a' with 40%
      probability and 'd' with 10% probability.
    """
    # BEGIN_YOUR_CODE (around 17 lines of code expected)
    num = []
    for tag in crf.TAGS:
        if t == 0:
            m = crf.G(t, BEGIN_TAG, tag, xs)
            m = m * crf.G(t+1, tag, ys[t+1],xs)
        elif t == len(xs)-1:
            m = crf.G(t, ys[t-1], tag, xs)
        else:
            m = crf.G(t, ys[t-1], tag, xs)
            m = m * crf.G(t+1, tag, ys[t-1], xs)
                        
        num.append(m)
    num = [i/sum(num) for i in num]
    ys[t] = crf.TAGS[util.multinomial(num)]
예제 #4
0
def chooseGibbsCRF(crf, t, xs, ys):
    r"""
    Choose a new assignment for y_t from the conditional distribution
    p( y_t | y_{-t} , xs ; \theta).

    @param t int - The index of the variable you want to update, y_t.
    @param xs list string - Observation seqeunce
    @param ys list string - Tag seqeunce

    Tips:
    * You should only use the potentials between y_t and its Markov
      blanket.
    * You don't return anything from this function, just update `ys`
      in place.

    Possibly useful:
    - crf.G 
    - util.multinomial: Given a PDF as a list OR counter, util.multinomial draws
      a sample from this distribution; for example,
      util.multinomial([0.4, 0.3, 0.2, 0.1]) will return 0 with 40%
      probability and 3 with 10% probability.
      Alternatively you could use,
      util.multinomial({'a':0.4, 'b':0.3, 'c':0.2, 'd':0.1}) will return 'a' with 40%
      probability and 'd' with 10% probability.
    """
    # BEGIN_YOUR_CODE (around 17 lines of code expected)
    num = []
    for tag in crf.TAGS:
        if t == 0:
            m = crf.G(t, BEGIN_TAG, tag, xs)
            m = m * crf.G(t + 1, tag, ys[t + 1], xs)
        elif t == len(xs) - 1:
            m = crf.G(t, ys[t - 1], tag, xs)
        else:
            m = crf.G(t, ys[t - 1], tag, xs)
            m = m * crf.G(t + 1, tag, ys[t - 1], xs)

        num.append(m)
    num = [i / sum(num) for i in num]
    ys[t] = crf.TAGS[util.multinomial(num)]
예제 #5
0
def chooseGibbsCRF(crf, t, xs, ys ):
    r"""
    Choose a new assignment for y_t from the conditional distribution
    p( y_t | y_{-t} , xs ; \theta).

    @param t int - The index of the variable you want to update, y_t.
    @param xs list string - Observation seqeunce
    @param ys list string - Tag seqeunce

    Tips:
    * You should only use the potentials between y_t and its Markov
      blanket.
    * You don't return anything from this function, just update `ys`
      in place.

    Possibly useful:
    - crf.G 
    - util.multinomial: Given a PDF as a list OR counter, util.multinomial draws
      a sample from this distribution; for example,
      util.multinomial([0.4, 0.3, 0.2, 0.1]) will return 0 with 40%
      probability and 3 with 10% probability.
      Alternatively you could use,
      util.multinomial({'a':0.4, 'b':0.3, 'c':0.2, 'd':0.1}) will return 'a' with 40%
      probability and 'd' with 10% probability.
    """
    # BEGIN_YOUR_CODE (around 17 lines of code expected)
    
    # compute partial weights
    if t == 0:
        weights = [crf.G(t, BEGIN_TAG, y, xs) * crf.G(t+1, y, ys[t+1], xs) for y in crf.TAGS]
    elif t == len(xs)-1:
        weights = [crf.G(t, ys[t-1], y, xs) for y in crf.TAGS]
    else:
        weights = [crf.G(t, ys[t-1], y, xs) * crf.G(t+1, y, ys[t+1], xs) for y in crf.TAGS]

    norm = sum(weights)

    ys[t] = util.multinomial({ y : weight/norm for y, weight in zip(crf.TAGS, weights) })
예제 #6
0
def chooseGibbsLongRangeCRF(crf, block, xs, ys):
    r"""
    Choose a new assignment for every variable in block from the
    conditional distribution p( y_{block} | y_{-block}, xs; \theta).

    @param block list int - List of variable indices that should be jointly updated.
    @param xs list string - Observation sequence
    @param ys list string - Tag sequence

    Tips:
    * In our model, we have a hard potential between all the variables in the
      block constraining them to be equal. You should only need to
      iterate through crf.TAGS once in order to choose a value for y_{block}
      (as opposed to |block| times).
    * You should only use the potentials between y_t and its Markov
      blanket.
    """
    #import nerUtils
    # BEGIN_YOUR_CODE (around 23 lines of code expected)
    nums = []
    for tag in crf.TAGS:
        num = 1
        if 0 in block:
            num = num * crf.G(0, BEGIN_TAG, tag, xs)
        for i in range(1, len(xs)):
            if (i in block) and (i - 1 in block):
                num = num * crf.G(i, tag, tag, xs)
            elif (i in block) and (i - 1 not in block):
                num = num * crf.G(i, ys[i - 1], tag, xs)
            elif (i not in block) and (i - 1 in block):
                num = num * crf.G(i, tag, ys[i], xs)
        nums.append(num)
    nums = [i / sum(nums) for i in nums]
    tag = crf.TAGS[util.multinomial(nums)]
    for t in block:
        ys[t] = tag