Exemplo n.º 1
0
    def _analyze_values(self,i,j,tags):
        """ i is a small number
            j is a large number
        """
        ti = tags[i]
        tj = tags[j]
        si = get_simplified_pos(ti)
        sj = get_simplified_pos(tj)

        dt = abs(i-j)-1
        if dt >= 5: dt = 5
        if dt >= 10: dt = 10
        if dt >= 15: dt = 15
        dt = str(dt)

        if i == 0: 
            tpi,spi = '-S-','-S-'
        else:
            tpi = tags[i-1]
            spi = get_simplified_pos(tpi)

        if j == len(tags)-1: 
            tnj,snj = '-E-','-E-'
        else:
            tnj = tags[j+1]
            snj = get_simplified_pos(tnj)

        if i != j-1:
            tni = tags[i+1]
            sni = get_simplified_pos(tni)
            tpj = tags[j-1]
            spj = get_simplified_pos(tpj)
        else:
            tni,sni = '-M-','-M-'
            tpj,spj = '-M-','-M-'

        return si,sj,dt,tpi,tni,tpj,tnj,spi,sni,spj,snj
Exemplo n.º 2
0
def gen_label_feature(root,child,l_sibling,r_sibling,context,s_len,b_table):
    # edges
    h_word,h_pos = root.get('word'),root.get('pos')
    c_word,c_pos = child.get('word'),child.get('pos')
    h_spos,c_spos = get_simplified_pos(h_pos),get_simplified_pos(c_pos)
    h_role = root.get('role')
    c_args = child.get('args')

    h_id = map(int,root.get('snode').split('_'))[0]
    c_id = map(int,child.get('snode').split('_'))[0]

    is_first,is_last = 'No','No'
    if c_id == 0:
        is_first = 'Yes'
    if c_id == s_len:
        is_last = 'Yes'
    di = 'R'
    if h_id > c_id:
        di = 'L'

    # children of child
    cc_poses = []
    if len(child.getchildren()) != []:
        for cc in child.getchildren():
            cc_poses.append(cc.get('pos'))
    has_prel = 'No'
    if 'prel' in cc_poses:
        has_prel = 'Yes'

    # siblings
    r_word,r_pos,r_spos = 'None','None','None'
    l_word,l_pos,l_spos = 'None','None','None'
    if r_sibling != None:
        r_word,r_pos = r_sibling.get('word'),r_sibling.get('pos')
    if l_sibling != None:
        l_word,l_pos = l_sibling.get('word'),l_sibling.get('pos')

    # context
    context_pos = []
    if h_id < c_id:
        key = '%s-%s'%(h_id+1,c_id+1)
        if key in b_table:
            context_pos = b_table[key]
        tmp = range(h_id+1,c_id)
    else:
        key = '%s-%s'%(c_id+1,h_id+1)
        if key in b_table:
            context_pos = b_table[key]
        tmp = range(c_id+1,h_id)
    context_id = map(lambda x:x[0],context)
    between_same_head = 'Yes'
    for x in tmp:
        if x not in context_id:
            between_same_head = 'No'
            break

    # non-local
    mods_of_child = len(child.getchildren())

    first_sibling_id = context[0][0]
    last_sibling_id = context[-1][0]

    is_right_most,is_left_most = 'No','No'
    is_first_right,is_first_left = 'No','No'

    l_tmp,r_tmp = [],[]
    if h_id > first_sibling_id and h_id < last_sibling_id:
        for id in context_id:
            if id < h_id:
                l_tmp.append(id)
            else:
                r_tmp.append(id)
    elif h_id > first_sibling_id and h_id > last_sibling_id:
        l_tmp = context_id
        r_tmp = [None]
    elif h_id < first_sibling_id and h_id < last_sibling_id:
        r_tmp = context_id
        l_tmp = [None]

    if c_id == l_tmp[0]:
        is_left_most = 'Yes'
    if c_id == r_tmp[-1]:
        is_right_most = 'Yes'
    if c_id == l_tmp[-1]:
        is_first_left = 'Yes'
    if c_id == r_tmp[0]:
        is_first_right = 'Yes'

    if c_args != 'unknown':
        c_args_features = [h_pos,c_pos,h_role,c_args],\
                          [h_spos,c_spos,h_role,c_args],\
                          [h_pos,c_pos,di,h_role,c_args],\
                          [h_spos,c_spos,di,h_role,c_args],\
                          [h_word,h_pos,c_word,c_pos,is_first,is_last,di,h_role,c_args],\
                          [h_word,h_spos,c_word,c_spos,is_first,is_last,di,h_role,c_args],\
                          [h_pos,c_pos,is_first,is_last,di,h_role,c_args],\
                          [h_spos,c_spos,is_first,is_last,di,h_role,c_args],\
                          [h_pos,c_pos,is_first,is_last,h_role,c_args],\
                          [h_spos,c_spos,is_first,is_last,h_role,c_args]
    else:
        c_args_features = None,None,None,None,None,None,None,None,None,None,

    return gen_features( 
                    [h_word,h_pos,c_word,c_pos,is_first,is_last,di], # edge features
                    [h_word,h_spos,c_word,c_spos,is_first,is_last,di], 
                    [h_pos,c_pos,is_first,is_last,di], 
                    [h_spos,c_spos,is_first,is_last,di], 
                    [h_pos,c_pos,is_first,is_last],
                    [h_spos,c_spos,is_first,is_last],
                    [h_pos,c_pos,di],
                    [h_spos,c_spos,di],
                    [h_pos,c_pos],
                    [h_spos,c_spos],

                    [h_pos,c_pos,h_role],
                    [h_spos,c_spos,h_role],
                    [h_pos,c_pos,di,h_role],
                    [h_spos,c_spos,di,h_role],
                    [h_word,h_pos,c_word,c_pos,is_first,is_last,di,h_role], 
                    [h_word,h_spos,c_word,c_spos,is_first,is_last,di,h_role], 
                    [h_pos,c_pos,is_first,is_last,di,h_role], 
                    [h_spos,c_spos,is_first,is_last,di,h_role], 
                    [h_pos,c_pos,is_first,is_last,h_role],
                    [h_spos,c_spos,is_first,is_last,h_role],

                    c_args_features,

                    [c_pos,has_prel],
                    [c_spos,has_prel],
                    [c_pos,di,has_prel],
                    [c_spos,di,has_prel],

                    [l_word,l_pos,r_word,r_pos], # sibiling features
                    [l_word,l_spos,r_word,r_spos], 
                    [l_pos,r_pos], 
                    [l_spos,r_spos], 

                    between_same_head, # context features

                    [str(mods_of_child),is_right_most,is_left_most,is_first_right,is_first_left], # non-local features
                    [str(mods_of_child)],
                    [is_right_most,is_left_most],
                    [is_first_right,is_first_left], 
                    [str(mods_of_child),is_right_most,is_left_most],
                    [str(mods_of_child),is_first_right,is_first_left], 
                    [is_right_most,is_left_most,is_first_right,is_first_left],
               )