Python can_reduce示例，transition.can_reduce Python示例

示例#1

0

显示文件

文件： features.py 项目： lintgren/EDAN20

def extract(stack, queue, state, feature_names, sentence):
    """
     Extract the features from one sentence
     returns X and y, where X is a list of dictionaries and
     y is a list of symbols
     :param sentence:
     :param w_size:
     :return:
     """
    x = []
    tmpListStack = ['nil', 'nil', 'nil', 'nil']
    tmpListQueue = ['nil', 'nil', 'nil', 'nil']
    try:
        tmpListStack[0] = stack[0]['form']
        tmpListStack[1] = stack[0]['postag']
        tmpListStack[2] = stack[1]['form']
        tmpListStack[3] = stack[1]['postag']
    except:
        pass
    try:
        tmpListQueue[0] = queue[0]['form']
        tmpListQueue[1] = queue[0]['postag']
        tmpListQueue[2] = queue[1]['form']
        tmpListQueue[3] = queue[1]['postag']
    except:
        pass
    x.extend(tmpListStack)
    x.extend(tmpListQueue)
    x.append(transition.can_rightarc(stack))
    x.append(transition.can_reduce(stack, state))
    # We represent the feature vector as a dictionary
    # The classes are stored in a list
    #y.append(padded_sentence[i + w_size][2])
    return dict(zip(feature_names, x))

示例#2

0

显示文件

文件： dparser.py 项目： pnugues/ilppp

def reference(stack, queue, graph):
    """
    Gold standard parsing
    Produces a sequence of transitions from a manually-annotated corpus:
    sh, re, ra.deprel, la.deprel
    :param stack: The stack
    :param queue: The input list
    :param graph: The set of relations already parsed
    :return: the transition and the grammatical function (deprel) in the
    form of transition.deprel
    """
    # Right arc
    if stack and stack[0]['id'] == queue[0]['head']:
        # print('ra', queue[0]['deprel'], stack[0]['cpostag'], queue[0]['cpostag'])
        deprel = '.' + queue[0]['deprel']
        stack, queue, graph = transition.right_arc(stack, queue, graph)
        return stack, queue, graph, 'ra' + deprel
    # Left arc
    if stack and queue[0]['id'] == stack[0]['head']:
        # print('la', stack[0]['deprel'], stack[0]['cpostag'], queue[0]['cpostag'])
        deprel = '.' + stack[0]['deprel']
        stack, queue, graph = transition.left_arc(stack, queue, graph)
        return stack, queue, graph, 'la' + deprel
    # Reduce
    if stack and transition.can_reduce(stack, graph):
        for word in stack:
            if (word['id'] == queue[0]['head'] or
                        word['head'] == queue[0]['id']):
                # print('re', stack[0]['cpostag'], queue[0]['cpostag'])
                stack, queue, graph = transition.reduce(stack, queue, graph)
                return stack, queue, graph, 're'
    # Shift
    # print('sh', [], queue[0]['cpostag'])
    stack, queue, graph = transition.shift(stack, queue, graph)
    return stack, queue, graph, 'sh'

示例#3

0

显示文件

def line_extract(stack, queue, graph, feature_names, sentence, samples,
                 special):
    x = []
    structures = [stack, queue]
    elements = ['postag', 'form']
    for structure in structures:
        for element in elements:
            for i in range(samples):
                if len(structure) > i:
                    x.append(structure[i][element])
                else:
                    x.append('nil')
    if special:
        # word before and after top of stack
        for element in elements:
            for i in [-1, 1]:
                if len(stack) > 0:
                    index = int(stack[0]['id']) + i
                    if 0 <= index < len(sentence):
                        x.append(sentence[index][element])
                    else:
                        x.append('nil')
                else:
                    x.append('nil')

    x.append(transition.can_reduce(stack, graph))
    x.append(transition.can_leftarc(stack, graph))
    return dict(zip(feature_names, x))

示例#4

0

显示文件

文件： dparser.py 项目： gustavkullberg/Language-Technology-EDAN20

def reference(stack, queue, graph):
    """
    Gold standard parsing
    Produces a sequence of transitions from a manually-annotated corpus:
    sh, re, ra.deprel, la.deprel
    :param stack: The stack
    :param queue: The input list
    :param graph: The set of relations already parsed
    :return: the transition and the grammatical function (deprel) in the
    form of transition.deprel
    """
    # Right arc
    if stack and stack[0]['id'] == queue[0]['head']:
        # print('ra', queue[0]['deprel'], stack[0]['cpostag'], queue[0]['cpostag'])
        deprel = '.' + queue[0]['deprel']
        stack, queue, graph = transition.right_arc(stack, queue, graph)
        return stack, queue, graph, 'ra' + deprel
    # Left arc
    if stack and queue[0]['id'] == stack[0]['head']:
        # print('la', stack[0]['deprel'], stack[0]['cpostag'], queue[0]['cpostag'])
        deprel = '.' + stack[0]['deprel']
        stack, queue, graph = transition.left_arc(stack, queue, graph)
        return stack, queue, graph, 'la' + deprel
    # Reduce
    if stack and transition.can_reduce(stack, graph):
        for word in stack:
            if (word['id'] == queue[0]['head'] or
                        word['head'] == queue[0]['id']):
                # print('re', stack[0]['cpostag'], queue[0]['cpostag'])
                stack, queue, graph = transition.reduce(stack, queue, graph)
                return stack, queue, graph, 're'
    # Shift
    # print('sh', [], queue[0]['cpostag'])
    stack, queue, graph = transition.shift(stack, queue, graph)
    return stack, queue, graph, 'sh'

示例#5

0

显示文件

def extract2(stack, queue, graph, feature_names, sentence):
    features = {}
    feat_vec = ['', '', '', '', '', '', '', '', '', '']
    if not stack:
        feat_vec[0] = feat_vec[1] = feat_vec[2] = feat_vec[3] = 'nil'
    elif len(stack) < 2:
        feat_vec[1] = feat_vec[3] = 'nil'
        feat_vec[0] = stack[0]['postag']
        feat_vec[2] = stack[0]['form']
    else:
        feat_vec[0] = stack[0]['postag']
        feat_vec[1] = stack[1]['postag']
        feat_vec[2] = stack[0]['form']
        feat_vec[3] = stack[1]['form']

    if not queue:
        feat_vec[5] = feat_vec[6] = feat_vec[7] = feat_vec[8] = 'nil'
    if len(queue) < 2:
        feat_vec[4] = queue[0]['postag']
        feat_vec[6] = queue[0]['form']
        feat_vec[5] = feat_vec[7] = 'nil'
    else:
        feat_vec[4] = queue[0]['postag']
        feat_vec[5] = queue[1]['postag']
        feat_vec[6] = queue[0]['form']
        feat_vec[7] = queue[1]['form']

    feat_vec[8] = transition.can_reduce(stack, graph)
    feat_vec[9] = transition.can_leftarc(stack, graph)

    return dict(zip(feature_names, feat_vec))

示例#6

0

显示文件

def extract_2(stack, queue, graph, feature_names, sentence):

    stack_list = ["nil", "nil", "nil", "nil"]
    if stack:
        stack_list[0] = stack[0]['form']
        stack_list[1] = stack[0]['postag']
        if len(stack) > 1:
            stack_list[2] = stack[1]['form']
            stack_list[3] = stack[1]['postag']
    queue_list = ["nil", "nil", "nil", "nil"]
    if queue:
        queue_list[0] = queue[0]['form']
        queue_list[1] = queue[0]['postag']
        if len(queue) > 1:
            queue_list[2] = queue[1]['form']
            queue_list[3] = queue[1]['postag']
    features = stack_list + queue_list
    can_re = transition.can_reduce(stack, graph)
    can_left_arc = transition.can_leftarc(stack, graph)
    features.append(can_re)
    features.append(can_left_arc)
    features = zip(feature_names, features)
    features = dict(features)
    # print(features)
    return features

示例#7

0

显示文件

def extract(stack, queue, graph, feature_names, sentence):
    # X contains one dict for each word, with each feature as a key in the dict
    # x is a row in X
    x = list()

    p_stack = stack + [{'id': '-1', 'form': 'nil', 'postag': 'nil'}]*2
    p_queue = queue + [{'form': 'nil', 'postag': 'nil'}]*2
    p_sentence = [{'form': 'nil', 'postag': 'nil'}, {'form': 'BOS', 'postag': 'BOS'}] + sentence[1:] + [{'form': 'EOS', 'postag': 'EOS'}]
    # 1st feature set
    x.append(transition.can_reduce(stack, graph))
    x.append(transition.can_leftarc(stack, graph))
    x.append(p_stack[0]['postag'])
    x.append(p_stack[0]['form'])
    x.append(p_queue[0]['postag'])
    x.append(p_queue[0]['form'])

    # 2nd feature set
    x.append(p_stack[1]['postag'])
    x.append(p_stack[1]['form'])
    x.append(p_queue[1]['postag'])
    x.append(p_queue[1]['form'])

    # 3rd feature set
    i = int(p_stack[0]['id'])
    if i == -1:
        x += ['nil']*4
    else:
        w_pre, w_next = p_sentence[i], p_sentence[i + 2]
        x.append(w_pre['postag'])
        x.append(w_pre['form'])
        x.append(w_next['postag'])
        x.append(w_next['form'])


    return dict(zip(feature_names, x))

示例#8

0

显示文件

def extract(stack, queue, graph, feature_names, sentence):
    full_sentence = sentence
    features = []

    try:
        features.append(stack[0]['postag'])
        features.append(stack[0]['form'])
    except:
        features.append("nil")
        features.append("nil")

    features.append(queue[0]['postag'])
    features.append(queue[0]['form'])
    if transition.can_leftarc(stack, graph):
        can_la = True
    else:
        can_la = False
    if transition.can_reduce(stack, graph):
        can_re = True
    else:
        can_re = False
    features.append(can_re)
    features.append(can_la)

    return features

示例#9

0

显示文件

文件： features.py 项目： apamildner/edan20

def generate_feature_vector2(stack,queue,graph):
  feature_names = ['stack0_POS','stack1_POS','stack0_word','stack1_word',
  'queue0_POS','queue1_POS','queue0_word','queue1_word','can-re','can-la']
  try:
    stack0_POS = stack[0]['postag']
  except IndexError:
    stack0_POS = 'nil'
  try:
    stack1_POS = stack[1]['postag']
  except IndexError:
    stack1_POS = 'nil'
  try:
    stack0_word = stack[0]['form']
  except IndexError:
    stack0_word = 'nil'
  try:
    stack1_word = stack[1]['form']
  except IndexError:
    stack1_word = 'nil'
  #Guaranteed to exist because of while loop
  queue0_POS = queue[0]['postag'] 
  queue0_word= queue[0]['form']
  try:
    queue1_POS = queue[1]['postag']
  except IndexError:
      queue1_POS = 'nil'
  try:
    queue1_word= queue[1]['form']
  except IndexError:
      queue1_word = 'nil'
  can_left_arc = transition.can_leftarc(stack,graph)
  can_reduce = transition.can_reduce(stack,graph)
  return dict(zip(feature_names, [stack0_POS,stack1_POS,stack0_word,stack1_word,queue0_POS,queue1_POS,queue0_word,queue1_word,can_reduce,can_left_arc]))

示例#10

0

显示文件

文件： features.py 项目： gustavkullberg/Language-Technology-EDAN20

def extract_features_sent(sentence, feature_names):
    """
    Extract the features from one sentence
    returns X and y, where X is a list of dictionaries and
    y is a list of symbols
    :param sentence:
    :param w_size:
    :return:
    """
    #sentence  = sentence.splitlines()

    stack = []
    graph = {}
    queue = list(sentence)
    graph['heads'] = {}
    graph['heads']['0'] = '0'
    graph['deprels'] = {}
    graph['deprels']['0'] = 'ROOT'

    transitions = []

    x = list()
    X = list()
    y = list()
    while queue:
        if (len(stack) > 0):
            x.append(stack[0]['cpostag'])
            x.append(stack[0]['form'])
        else:
            x.append('nil')
            x.append('nil')

        if (queue):
            x.append(queue[0]['cpostag'])
            x.append(queue[0]['form'])
        else:
            x.append('nil')
            x.append('nil')

        x.append(transition.can_reduce(stack, graph))
        x.append(transition.can_leftarc(stack, graph))

        X.append(dict(zip(feature_names, x)))
        #remove reference, predict what action should be done(equiv to trans)
        stack, queue, graph, trans = dparser.reference(stack, queue, graph)
        y.append(trans)
        x = list()
    #stack, graph = transition.empty_stack(stack, graph)

    #for word in queue:
    #print(word['form'])
    #stack, queue, graph, trans = reference(stack, queue, graph)
    #transitions.append(trans)
    # stack, graph = transition.empty_stack(stack, graph)
    return X, y

示例#11

0

显示文件

def parse_ml(stack, queue, graph, trans):
    if stack and trans[:2] == 'ra' and transition.can_rightarc(stack):
        stack, queue, graph = transition.right_arc(stack, queue, graph, trans[3:])
        return stack, queue, graph, 'ra'
    if stack and trans[:2] == 'la' and transition.can_leftarc(stack, graph):
        stack, queue, graph = transition.left_arc(stack, queue, graph, trans[3:])
        return stack, queue, graph, 'la'
    if stack and trans[:2] == 're' and transition.can_reduce(stack, graph):
        stack, queue, graph = transition.reduce(stack, queue, graph)
        return stack, queue, graph, 're'
    stack, queue, graph = transition.shift(stack, queue, graph)
    return stack, queue, graph, 'sh'

示例#12

0

显示文件

文件： dparser.py 项目： KarlJoad/ilppp

def reference(stack, queue, graph):
    """
    Gold standard parsing
    Produces a sequence of transitions from a manually-annotated corpus:
    sh, re, ra.deprel, la.deprel
    :param stack: The stack
    :param queue: The input list
    :param graph: The set of relations already parsed
    :return: the transition and the grammatical function (deprel) in the
    form of transition.deprel
    """
    # This is a continuous list of if statements, but each has a return statement inside of it, so it behaves similar to
    # a set of elif statements. However, there is a priority level to these operations,
    # 1) Right-arc, 2) Left-arc, 3) Reduce, 4) Shift
    # Right arc
    if stack and stack[0]['id'] == queue[0]['head']:
        # If stack is non-null and
        # the top of the stack's id key is the same as the front of the queue's head key are the same
        # Then we do a right-arc
        # print('ra', queue[0]['deprel'], stack[0]['cpostag'], queue[0]['cpostag'])
        deprel = '.' + queue[0]['deprel']
        stack, queue, graph = transition.right_arc(stack, queue, graph)
        return stack, queue, graph, 'ra' + deprel
    # Left arc
    if stack and queue[0]['id'] == stack[0]['head']:
        # If the stack is non-null AND
        # the front of the queue's ID is the same as the top of the stack's head value
        # Then we do a left-arc
        # print('la', stack[0]['deprel'], stack[0]['cpostag'], queue[0]['cpostag'])
        deprel = '.' + stack[0]['deprel']
        stack, queue, graph = transition.left_arc(stack, queue, graph)
        return stack, queue, graph, 'la' + deprel
    # Reduce
    if stack and transition.can_reduce(stack, graph):
        # If the stack is non-null and the stack can be reduced, then
        for word in stack:
            # For each word present in the stack
            if (word['id'] == queue[0]['head'] or
                    # If the word's ID is the same as the front of the queue's HEAD value
                    word['head'] == queue[0]['id']):
                # OR the word's HEAD value matches the front of the queue's ID value
                # print('re', stack[0]['cpostag'], queue[0]['cpostag'])
                # Then we can reduce the value that is on the stack
                stack, queue, graph = transition.reduce(stack, queue, graph)
                return stack, queue, graph, 're'
    # Shift
    # We will only get here if none of the other operations did NOT occur
    # This is because each of the if statements for the operations has a return statement
    # Stack was pushed to, queue had front removed, and graph is unmodified by shift actions
    stack, queue, graph = transition.shift(stack, queue, graph)
    return stack, queue, graph, 'sh'

示例#13

0

显示文件

def extract3(stack, queue, graph, feature_names, sentence):
    features = {}
    feat_vec = ['', '', '', '', '', '', '', '', '', '', '', '', '', '']
    if not stack:
        feat_vec[0] = feat_vec[1] = feat_vec[2] = feat_vec[3] = 'nil'
    elif len(stack) < 2:
        feat_vec[1] = feat_vec[3] = 'nil'
        feat_vec[0] = stack[0]['postag']
        feat_vec[2] = stack[0]['form']
    else:
        feat_vec[0] = stack[0]['postag']
        feat_vec[1] = stack[1]['postag']
        feat_vec[2] = stack[0]['form']
        feat_vec[3] = stack[1]['form']

    if not queue:
        feat_vec[5] = feat_vec[6] = feat_vec[7] = feat_vec[8] = 'nil'
    if len(queue) < 2:
        feat_vec[4] = queue[0]['postag']
        feat_vec[6] = queue[0]['form']
        feat_vec[5] = feat_vec[7] = 'nil'
    else:
        feat_vec[4] = queue[0]['postag']
        feat_vec[5] = queue[1]['postag']
        feat_vec[6] = queue[0]['form']
        feat_vec[7] = queue[1]['form']

    feat_vec[8] = transition.can_reduce(stack, graph)
    feat_vec[9] = transition.can_leftarc(stack, graph)

    #before 10, 11
    #after 12,13

    if not stack:
        feat_vec[10] = feat_vec[11] = feat_vec[12] = feat_vec[13] = 'nil'
    else:
        st_id = stack[0]['id']
        if int(st_id) == 0:
            feat_vec[10] = feat_vec[11] = 'nil'
        else:
            feat_vec[10] = sentence[int(st_id) - 1]['postag']
            feat_vec[11] = sentence[int(st_id) - 1]['form']

        if int(st_id) > len(sentence):
            feat_vec[12] = feat_vec[13] = 'nil'
        else:
            feat_vec[12] = sentence[int(st_id) + 1]['postag']
            feat_vec[13] = sentence[int(st_id) + 1]['form']

    return dict(zip(feature_names, feat_vec))

示例#14

0

显示文件

文件： dparser.py 项目： nsfan666666/EDAN20-Labs

def extract(stack, queue, graph, feature_names, sentence):
    X = []
    X.append(transition.can_leftarc(stack, graph))
    X.append(transition.can_reduce(stack, graph))
    try:
        X.append(stack[0]['postag'])
        X.append(stack[0]['form'])
    except:
        X.append("nil")
        X.append("nil")
    try:
        X.append(stack[1]['postag'])
        X.append(stack[1]['form'])
    except:
        X.append("nil")
        X.append("nil")
    X1 = X
    try:
        X.append(queue[0]['postag'])
        X.append(queue[0]['form'])
    except:
        X.append("nil")
        X.append("nil")
    try:
        X.append(queue[1]['postag'])
        X.append(queue[1]['form'])
    except:
        X.append("nil")
        X.append("nil")
    X2 = X
    try:
        for i in range(len(sentence)):
            if sentence[i]['form'] == stack[0]['form']:
                X.append(sentence[i + 1]['postag'])
                X.append(sentence[i + 1]['form'])
    except:
        X.append("nil")
        X.append("nil")

    try:
        X.append(sentence[int(stack[1]['head'])]['postag'])
        X.append(sentence[int(stack[1]['head'])]['form'])
    except:
        X.append("nil")
        X.append("nil")
    X3 = X
    X1 = dict(zip(feature_names[:6], X1))
    X2 = dict(zip(feature_names[:10], X2))
    X3 = dict(zip(feature_names, X3))
    return X1, X2, X3

示例#15

0

显示文件

文件： lab6.py 项目： gustavkullberg/Language-Technology-EDAN20

def extract_features_sent(sentence, feature_names, classifier, dict_classes,
                          vec):

    stack = []
    graph = {}
    queue = list(sentence)
    graph['heads'] = {}
    graph['heads']['0'] = '0'
    graph['deprels'] = {}
    graph['deprels']['0'] = 'ROOT'
    transitions = []

    x = list()
    X = list()
    y = list()
    while queue:
        if (len(stack) > 0):
            x.append(stack[0]['cpostag'])
            x.append(stack[0]['form'])
        else:
            x.append('nil')
            x.append('nil')

        if (queue):
            x.append(queue[0]['cpostag'])
            x.append(queue[0]['form'])
        else:
            x.append('nil')
            x.append('nil')

        x.append(transition.can_reduce(stack, graph))
        x.append(transition.can_leftarc(stack, graph))
        X = (dict(zip(feature_names, x)))
        #remove reference, predict what action should be done(equiv to trans)
        #print('Stack is ', len(stack))
        #print('Queue is ', queue)
        trans_nr = classifier.predict(vec.transform(X))
        print(trans_nr[0])
        trans = dict_classes[trans_nr[0]]
        stack, queue, graph, trans = parse_ml(stack, queue, graph, trans)
        x = list()
    #stack, graph = transition.empty_stack(stack, graph)

    transition.empty_stack(stack, graph)
    for word in sentence:
        word['head'] = graph['heads'][word['id']]
        word['deprel'] = graph['deprels'][word['id']]
    return graph

示例#16

0

显示文件

def extract_mode_1(stack, queue, graph, feature_names, sentence=None):
    features = list()
    if stack:
        stack0 = stack[0]
        features.extend([stack0.get('postag'), stack0.get('form')])
    else:
        features.extend(['nil', 'nil'])
    if queue:
        queue0 = queue[0]
        features.extend([queue0.get('postag'), queue0.get('form')])
    else:
        features.extend(['nil', 'nil'])

    features.append(transition.can_reduce(stack, graph))
    features.append(transition.can_leftarc(stack, graph))
    return dict(zip(feature_names.get('mode1'), features))

示例#17

0

显示文件

def extract(stack, queue, graph, feature_names, sentence):

    features = []

    features.append(stack[0]['postag'] if len(stack) > 0 else 'nil')
    features.append(stack[1]['postag'] if len(stack) > 1 else 'nil')
    features.append(stack[0]['form'] if len(stack) > 0 else 'nil')
    features.append(stack[1]['form'] if len(stack) > 1 else 'nil')
    features.append(queue[0]['postag'] if len(queue) > 0 else 'nil')
    features.append(queue[1]['postag'] if len(queue) > 1 else 'nil')
    features.append(queue[0]['form'] if len(queue) > 0 else 'nil')
    features.append(queue[1]['form'] if len(queue) > 1 else 'nil')
    features.append(transition.can_reduce(stack, graph))
    features.append(transition.can_leftarc(stack, graph))

    return dict(zip(feature_names, features))

示例#18

0

显示文件

文件： dparser.py 项目： rossii123/edan20

def extract(stack, queue, state, feature_names, sentence):
    features = {}
    features["can_reduce"] = str(transition.can_reduce(stack, state))
    features["can_leftarc"] = str(transition.can_leftarc(stack, state))

    if stack:
        features["stack0_postag"] = stack[0]["postag"]
        features["stack0_form"] = stack[0]["form"]
    else:
        features["stack0_postag"] = "nil"
        features["stack0_form"] = "nil"
    if len(stack) > 1:
        features["stack1_postag"] = stack[1]["postag"]
        features["stack1_form"] = stack[1]["form"]
    else:
        features["stack1_postag"] = "nil"
        features["stack1_form"] = "nil"

    if queue:
        features["queue0_postag"] = queue[0]["postag"]
        features["queue0_form"] = queue[0]["form"]
    else:
        features["queue0_postag"] = "nil"
        features["queue0_form"] = "nil"
    if len(queue) > 1:
        features["queue1_postag"] = queue[1]["postag"]
        features["queue1_form"] = queue[1]["form"]
    else:
        features["queue1_postag"] = "nil"
        features["queue1_form"] = "nil"

    features["nextWord_form"] = "nil"
    features["nextWord_postag"] = "nil"
    if int(queue[0]["id"]) < len(sentence) - 1:
        w = sentence[int(queue[0]["id"]) + 1]
        features["nextWord_form"] = w['form']
        features["nextWord_postag"] = w['postag']

    features["prevWord_form"] = "nil"
    features["prevWord_postag"] = "nil"
    if int(queue[0]["id"]) > 0:
        w = sentence[int(queue[0]["id"]) - 1]
        features["prevWord_form"] = w['form']
        features["prevWord_postag"] = w['postag']

    return features

示例#19

0

显示文件

文件： dparser.py 项目： tfy15lha/Language-Technology

def parse_ml(stack, queue, graph, trans):
    #print(trans)
    if stack and transition.can_rightarc(stack) and trans[:2] == 'ra':
        stack, queue, graph = transition.right_arc(stack, queue, graph,
                                                   trans[3:])
        return stack, queue, graph, 'ra'
    elif stack and transition.can_leftarc(
            stack, graph) and trans[:2] == 'la':  #VARFÖR :2
        stack, queue, graph = transition.left_arc(stack, queue, graph,
                                                  trans[3:])
        return stack, queue, graph, 'la'
    elif stack and transition.can_reduce(stack, graph) and trans[:2] == 're':
        stack, queue, graph = transition.reduce(stack, queue, graph)
        return stack, queue, graph, 're'
    else:
        stack, queue, graph = transition.shift(stack, queue, graph)
        return stack, queue, graph, "sh"

示例#20

0

显示文件

文件： features.py 项目： danieltovesson/EDAN20-Language-Technology

def extract(stack, queue, graph, feature_names, sentence):
    features = []

    # Feature set 1
    features.extend(['nil', 'nil', 'nil', 'nil', 'nil', 'nil'])
    if len(stack) >= 1:
        features[0] = stack[0]['form']
        features[1] = stack[0]['postag']
    if len(queue) >= 1:
        features[2] = queue[0]['form']
        features[3] = queue[0]['postag']
    features[4] = transition.can_leftarc(stack, graph)
    features[5] = transition.can_reduce(stack, graph)

    # Feature set 2
    if len(feature_names) == 10 or len(feature_names) == 14:
        features.extend(['nil', 'nil', 'nil', 'nil'])
        if len(stack) >= 2:
            features[6] = stack[1]['form']
            features[7] = stack[1]['postag']
        if len(queue) >= 2:
            features[8] = queue[1]['form']
            features[9] = queue[1]['postag']

    # Feature set 3
    if len(feature_names) == 14:
        features.extend(['nil', 'nil', 'nil', 'nil'])
        if len(stack) >= 1 and len(sentence) > int(stack[0]['id']) + 1:
            word = sentence[int(stack[0]['id']) + 1]
            features[10] = word['form']
            features[11] = word['postag']
        if len(queue) >= 1 and len(sentence) > int(queue[0]['id']) + 1:
            word = sentence[int(queue[0]['id']) + 1]
            features[12] = word['form']
            features[13] = word['postag']

    features = dict(zip(feature_names, features))

    return features

示例#21

0

显示文件

文件： dparser.py 项目： miquelpuigmena/NaturalLanguageProcessing

def parse_ml(stack, queue, graph, trans):
    #right arc
    if stack and trans[:2] == 'ra' and transition.can_rightarc(stack):
        stack, queue, graph = transition.right_arc(stack, queue, graph,
                                                   trans[3:])
        return stack, queue, graph, 'ra'
    #left arc
    if stack and trans[:2] == 'la' and transition.can_leftarc(stack, graph):
        stack, queue, graph = transition.left_arc(stack, queue, graph,
                                                  trans[3:])
        return stack, queue, graph, 'la'
    #reduce
    if stack and trans[:2] == 're' and transition.can_reduce(stack, graph):
        stack, queue, graph = transition.reduce(stack, queue, graph)
        return stack, queue, graph, 're'
    #shift
    if stack and trans[:2] == 'sh':
        stack, queue, graph = transition.shift(stack, queue, graph)
        return stack, queue, graph, 'sh'
    #action not possible -> shift
    else:
        stack, queue, graph = transition.shift(stack, queue, graph)
        return stack, queue, graph, 'sh'

示例#22

0

显示文件

文件： features.py 项目： apamildner/edan20

def generate_feature_vector3(stack,queue,graph,sentence):
  feature_names = ['stack0_POS','stack1_POS','stack0_word','stack1_word',
  'queue0_POS','queue1_POS','queue0_word','queue1_word','can-re','can-la','following_word','following_word_POS','queue3_POS','stack0_previous_word_POS']
  try:
    stack0_POS = stack[0]['postag']
  except IndexError:
    stack0_POS = 'nil'
  try:
    stack1_POS = stack[1]['postag']
  except IndexError:
    stack1_POS = 'nil'
  try:
    stack0_word = stack[0]['form']
  except IndexError:
    stack0_word = 'nil'
  try:
    stack1_word = stack[1]['form']
  except IndexError:
    stack1_word = 'nil'
  #Guaranteed to exist because of while loop
  queue0_POS = queue[0]['postag'] 
  queue0_word= queue[0]['form']
  try:
    queue1_POS = queue[1]['postag']
  except IndexError:
      queue1_POS = 'nil'
  try:
    queue1_word= queue[1]['form']
  except IndexError:
    queue1_word = 'nil'

  #POS QUEUE 3
  try:
    queue3_POS = queue[3]['postag']
  except IndexError:
    queue3_POS = 'nil'

  #POS STACK 0 pw
  try:
    idx = stack[0]['id']
    stack0_previous_word_POS= sentence[int(idx)-1]['postag']
  except IndexError:
    stack0_previous_word_POS= 'nil'
  #POS STACK 0 fw
  try:
    idx = stack[0]['id']
    following_word_POS = sentence[int(idx)+1]['postag']
  except IndexError:
    following_word_POS = 'nil'
  #LEX STACK 0 fw
  try:
    idx = stack[0]['id']
    following_word = sentence[int(idx)+1]['form']
  except IndexError:
    following_word = 'nil'

  
  can_left_arc = transition.can_leftarc(stack,graph)
  can_reduce = transition.can_reduce(stack,graph)



  return dict(zip(feature_names, [stack0_POS,stack1_POS,stack0_word,stack1_word,queue0_POS,queue1_POS,
  queue0_word,queue1_word,can_reduce,can_left_arc,following_word,following_word_POS,queue3_POS,stack0_previous_word_POS]))

示例#23

0

显示文件

def extract(stack, queue, graph, feature_names, sentence):
    features = {}
    for fn in feature_names:
        if fn == 'stack0_POS':
            if stack:
                features["stack0_POS"] = stack[0]["postag"]
            else:
                features["stack0_POS"] = "nil"
        if fn == 'stack1_POS':
            if len(stack) > 1:
                features["stack1_POS"] = stack[1]["postag"]
            else:
                features["stack1_POS"] = "nil"
        if fn == 'stack0_word':

            if stack:
                features["stack0_word"] = stack[0]["form"]
            else:
                features["stack0_word"] = "nil"
        if fn == 'stack1_word':
            if stack and len(stack) > 1:
                features["stack1_word"] = stack[1]["form"]
            else:
                features["stack1_word"] = "nil"
        if fn == 'queue0_POS':
            if queue:
                features["queue0_POS"] = queue[0]["postag"]
            else:
                features["queue0_POS"] = "nil"
        if fn == 'queue1_POS':
            if queue and len(queue) > 1:
                features["queue1_POS"] = queue[1]["postag"]
            else:
                features["queue1_POS"] = "nil"
        if fn == 'queue0_word':
            if queue:
                features["queue0_word"] = queue[0]["form"]
            else:
                features["queue0_word"] = "nil"
        if fn == 'queue1_word':
            if queue and len(queue) > 1:
                features["queue1_word"] = queue[1]["form"]
            else:
                features["queue1_word"] = "nil"
        if fn == 'can-re':
            features["can-re"] = str(transition.can_reduce(stack, graph))

        if fn == 'can-la':
            features["can-la"] = str(transition.can_leftarc(stack, graph))

        if fn == 'next_word_POS':
            features["next_word_POS"] = "nil"
            if int(queue[0]["id"]) < len(sentence) - 1:
                #Next sentece +1
                w = sentence[int(queue[0]["id"]) + 1]
                features["next_word_POS"] = w['postag']
        if fn == 'next_word':
            features["next_word"] = "nil"
            if int(queue[0]["id"]) < len(sentence) - 1:
                #Next sentece +1
                w = sentence[int(queue[0]["id"]) + 1]
                features["next_word"] = w['form']

        if fn == 'prev_word_POS':
            features["prev_word_POS"] = "nil"
            if int(queue[0]["id"]) < len(sentence) - 1:
                # prev sentece -1
                w = sentence[int(queue[0]["id"]) - 1]
                features["prev_word_POS"] = w['postag']
        if fn == 'prev_word':
            features["prev_word"] = "nil"
            if int(queue[0]["id"]) < len(sentence) - 1:
                # prev sentece -1
                w = sentence[int(queue[0]["id"]) - 1]
                features["prev_word"] = w['form']

    return features

示例#24

0

显示文件

文件： features.py 项目： elt14gfa/NLP-lab5and6

def extract(stack, queue, graph, feature_names, sentence):
    """
    Returns a row
    """

    features = list()

    # TODO: Should we use postag os cpostag?
    POS_TAG = 'postag'
    WORD_TAG = 'form'
    ID_TAG = 'id'
    DEPREL_TAG = 'deprel'
    NULL_VALUE = 'nil'
    HEAD_TAG = 'head'

    # stack_0
    if stack:
        stack_0_POS = stack[0][POS_TAG]
        stack_0_word = stack[0][WORD_TAG]
    else:
        stack_0_POS = NULL_VALUE
        stack_0_word = NULL_VALUE

    # stack_1
    if len(stack) > 1:
        stack_1_POS = stack[1][POS_TAG]
        stack_1_word = stack[1][WORD_TAG]
    else:
        stack_1_POS = NULL_VALUE
        stack_1_word = NULL_VALUE

    # queue_0
    if queue:
        queue_0_POS = queue[0][POS_TAG]
        queue_0_word = queue[0][WORD_TAG]
    else:
        queue_0_POS = NULL_VALUE
        queue_0_word = NULL_VALUE

    # queue_1
    if len(queue) > 1:
        queue_1_POS = queue[1][POS_TAG]
        queue_1_word = queue[1][WORD_TAG]
    else:
        queue_1_POS = NULL_VALUE
        queue_1_word = NULL_VALUE

    if len(feature_names) == 6:
        features.append(stack_0_word)
        features.append(stack_0_POS)

        features.append(queue_0_word)
        features.append(queue_0_POS)

        features.append(transition.can_reduce(stack, graph))
        features.append(transition.can_leftarc(stack, graph))

    elif len(feature_names) == 10:
        features.append(stack_0_word)
        features.append(stack_0_POS)

        features.append(stack_1_word)
        features.append(stack_1_POS)

        features.append(queue_0_word)
        features.append(queue_0_POS)

        features.append(queue_1_word)
        features.append(queue_1_POS)

        features.append(transition.can_reduce(stack, graph))
        features.append(transition.can_leftarc(stack, graph))

    elif len(feature_names) == 13:
        # word after top of stack in sentence
        if stack_0_word == NULL_VALUE:
            after_stack_0_word = NULL_VALUE
            after_stack_0_POS = NULL_VALUE
        else:
            id_stack_0 = int(stack[0]['id'])
            if len(sentence) - 1 == id_stack_0:  #stack 0 is the last word
                after_stack_0_word = NULL_VALUE
                after_stack_0_POS = NULL_VALUE
            else:
                next_word = sentence[id_stack_0 + 1]
                after_stack_0_word = next_word[WORD_TAG]
                after_stack_0_POS = next_word[POS_TAG]

        # # Head of stack 0 POS
        # if stack:
        #     head_index_of_stack_0 = stack[0][HEAD_TAG]
        #     head_of_stack_0 = sentence[int(head_index_of_stack_0)]
        #     head_of_stack_0_POS = head_of_stack_0[POS_TAG]
        # else:
        #     head_of_stack_0_POS = NULL_VALUE

        features.append(stack_0_word)
        features.append(stack_0_POS)

        features.append(stack_1_word)
        features.append(stack_1_POS)

        features.append(queue_0_word)
        features.append(queue_0_POS)

        features.append(queue_1_word)
        features.append(queue_1_POS)

        features.append(after_stack_0_word)
        features.append(after_stack_0_POS)

        features.append(transition.can_reduce(stack, graph))
        features.append(transition.can_leftarc(stack, graph))

        # Our own features
        features.append(transition.can_rightarc(stack))
        # features.append(head_of_stack_0_POS)

    # Convert features object
    features = dict(zip(feature_names, features))

    return features

示例#25

0

显示文件

def extract_features_sent(sentence, feature_names):
    """
    Extract the features from one sentence
    returns X and y, where X is a list of dictionaries and
    y is a list of symbols
    :param sentence:
    :param feature_names
    :return:
    """
    #sentence  = sentence.splitlines()

    stack = []
    graph = {}
    queue = list(sentence)

    graph['heads'] = {}
    graph['heads']['0'] = '0'
    graph['deprels'] = {}
    graph['deprels']['0'] = 'ROOT'

    transitions = []

    x = list()
    X = list()
    y = list()

    while queue:

        if (len(stack) > 0):
            x.append(stack[0]['cpostag'])
        else:
            x.append('nil')
        if (len(stack) > 1):
            x.append(stack[1]['cpostag'])
        else:
            x.append('nil')
        if (len(stack) > 0):
            x.append(stack[0]['form'])
        else:
            x.append('nil')
        if (len(stack) > 1):
            x.append(stack[1]['form'])
        else:
            x.append('nil')
        if (queue):
            x.append(queue[0]['cpostag'])
        else:
            x.append('nil')
        if (len(queue) > 1):
            x.append(queue[1]['cpostag'])
        else:
            x.append('nil')
        if (queue):
            x.append(queue[0]['form'])
        else:
            x.append('nil')
        if (len(queue) > 1):
            x.append(queue[1]['form'])
        else:
            x.append('nil')

        x.append(transition.can_reduce(stack, graph))
        x.append(transition.can_leftarc(stack, graph))
        X.append(dict(zip(feature_names, x)))
        stack, queue, graph, trans = dparser.reference(stack, queue, graph)
        y.append(trans)
        x = list()
    # x.append(stack[0]['cpostag'])

    return X, y

示例#26

0

显示文件

def extract_3(stack, queue, graph, feature_names, sentence):

    stack_list = ["nil", "nil", "nil", "nil"]
    if stack:
        stack_list[0] = stack[0]['form']
        stack_list[1] = stack[0]['postag']
        if len(stack) > 1:
            stack_list[2] = stack[1]['form']
            stack_list[3] = stack[1]['postag']
    queue_list = ["nil", "nil", "nil", "nil"]
    if queue:
        queue_list[0] = queue[0]['form']
        queue_list[1] = queue[0]['postag']
        if len(queue) > 1:
            queue_list[2] = queue[1]['form']
            queue_list[3] = queue[1]['postag']
    features = stack_list + queue_list

    previous_word = ["nil", "nil"]

    if stack:
        if int(stack[0]["id"]) > 0:
            word = sentence[int(stack[0]["id"]) - 1]
            previous_word[0] = word['form']
            previous_word[1] = word['postag']

    features = features + previous_word

    next_word = ["nil", "nil"]
    if stack:
        if int(stack[0]["id"]) < len(sentence) - 1:
            word = sentence[int(stack[0]["id"]) + 1]
            next_word[0] = word['form']
            next_word[1] = word['postag']

    features = features + next_word

    can_re = transition.can_reduce(stack, graph)
    can_left_arc = transition.can_leftarc(stack, graph)
    features.append(can_re)
    features.append(can_left_arc)
    features = zip(feature_names, features)
    features = dict(features)
    # print(features)
    # print(features)
    return features


# extract_features(sentences, w_size, feature_names):
#     """
#     Builds X matrix and y vector
#     X is a list of dictionaries and y is a list
#     :param sentences:
#     :param w_size:
#     :return:
#     """

#     X_l = []
#     y_l = []
#     for sentence in sentences:
#         X, y = extract_features_sent(sentence, w_size, feature_names, False)
#         X_l.extend(X)
#         y_l.extend(y)
#     return X_l, y_l
#
#
# def extract_features_sent(sentence, w_size, feature_names, useChunk):
#     """
#     Extract the features from one sentence
#     returns X and y, where X is a list of dictionaries and
#     y is a list of symbols
#     :param sentence: string containing the CoNLL structure of a sentence
#     :param w_size:
#     :return:
#     """
#
#     # We pad the sentence to extract the context window more easily
#     start = "BOS BOS BOS\n"
#     end = "\nEOS EOS EOS"
#     start *= w_size
#     end *= w_size
#     sentence = start + sentence
#     sentence += end
#
#     # Each sentence is a list of rows
#     sentence = sentence.splitlines()
#     padded_sentence = list()
#     for line in sentence:
#         line = line.split()
#         padded_sentence.append(line)
#     # print(padded_sentence)
#
#     # We extract the features and the classes
#     # X contains is a list of features, where each feature vector is a dictionary
#     # y is the list of classes
#     X = list()
#     y = list()
#     for i in range(len(padded_sentence) - 2 * w_size):
#         # x is a row of X
#         x = list()
#         # The words in lower case
#         for j in range(2 * w_size + 1):
#             x.append(padded_sentence[i + j][0].lower())
#         # The POS
#         for j in range(2 * w_size + 1):
#             x.append(padded_sentence[i + j][1])
#         for j in range(w_size):
#             x.append(padded_sentence[i + j][2])
#             # for j in range(2):
#             #     x.append(padded_sentence[i - j + 1][2])
#         # The chunks (Up to the word)
#         """
#         for j in range(w_size):
#             feature_line.append(padded_sentence[i + j][2])
#         """
#         # We represent the feature vector as a dictionary
#         X.append(dict(zip(feature_names, x)))
#         # The classes are stored in a list
#         y.append(padded_sentence[i + w_size][2])
#     return X, y

示例#27

0

显示文件

def extract_features_sent(sentence, feature_names, classifier, dict_classes,
                          vec):

    stack = []
    graph = {}
    queue = list(sentence)
    graph['heads'] = {}
    graph['heads']['0'] = '0'
    graph['deprels'] = {}
    graph['deprels']['0'] = 'ROOT'

    x = list()
    X = list()
    d = len(sentence)
    while queue:
        if (len(stack) > 0):
            x.append(stack[0]['cpostag'])
        else:
            x.append('nil')
        if (len(stack) > 1):
            x.append(stack[1]['cpostag'])
        else:
            x.append('nil')
        if (len(stack) > 0):
            x.append(stack[0]['form'])
        else:
            x.append('nil')
        if (len(stack) > 1):
            x.append(stack[1]['form'])
        else:
            x.append('nil')
        if (queue):
            x.append(queue[0]['cpostag'])
        else:
            x.append('nil')
        if (len(queue) > 1):
            x.append(queue[1]['cpostag'])
        else:
            x.append('nil')
        if (queue):
            x.append(queue[0]['form'])
        else:
            x.append('nil')
        if (len(queue) > 1):
            x.append(queue[1]['form'])
        else:
            x.append('nil')

        x.append(transition.can_reduce(stack, graph))
        x.append(transition.can_leftarc(stack, graph))
        X = (dict(zip(feature_names, x)))
        trans_nr = classifier.predict(vec.transform(X))[0]
        trans = dict_classes[trans_nr]
        stack, queue, graph, trans = parse_ml(stack, queue, graph, trans)
        x = list()

    transition.empty_stack(stack, graph)
    for word in sentence:
        word['head'] = graph['heads'][word['id']]
        word['deprel'] = graph['deprels'][word['id']]
    return X

示例#28

0

显示文件

文件： features.py 项目： Ga22be/EDAN20

def extract(stack, queue, graph, feature_names, sentence):
    features = list()

    POS_TAG = "postag"
    WORD_TAG = "form"
    ID_TAG = "id"
    DEPREL_TAG = "deprel"
    NULL_VALUE = "nil"
    HEAD_TAG = "head"

    if stack:
        stack_0_pos = stack[0][POS_TAG]
        stack_0_word = stack[0][WORD_TAG]
    else:
        stack_0_pos = NULL_VALUE
        stack_0_word = NULL_VALUE

    if len(stack) > 1:
        stack_1_pos = stack[1][POS_TAG]
        stack_1_word = stack[1][WORD_TAG]
    else:
        stack_1_pos = NULL_VALUE
        stack_1_word = NULL_VALUE

    if queue:
        queue_0_pos = queue[0][POS_TAG]
        queue_0_word = queue[0][WORD_TAG]
    else:
        queue_0_pos = NULL_VALUE
        queue_0_word = NULL_VALUE

    if len(queue) > 1:
        queue_1_pos = queue[1][POS_TAG]
        queue_1_word = queue[1][WORD_TAG]
    else:
        queue_1_pos = NULL_VALUE
        queue_1_word = NULL_VALUE

    if len(feature_names) == 6:
        features.append(stack_0_word)
        features.append(stack_0_pos)

        features.append(queue_0_word)
        features.append(queue_0_pos)

        features.append(transition.can_reduce(stack, graph))
        features.append(transition.can_leftarc(stack, graph))

    elif len(feature_names) == 10:
        features.append(stack_0_word)
        features.append(stack_0_pos)

        features.append(stack_1_word)
        features.append(stack_1_pos)

        features.append(queue_0_word)
        features.append(queue_0_pos)

        features.append(queue_1_word)
        features.append(queue_1_pos)

        features.append(transition.can_reduce(stack, graph))
        features.append(transition.can_leftarc(stack, graph))

    features = dict(zip(feature_names, features))

    return features

示例#29

0

显示文件

文件： features3.py 项目： tfy15lha/Language-Technology

def extract(stack, queue, graph, feature_names, sentence):
    full_sentence = sentence
    features = []
    # print(sentence)
    try:
        #feature_names = [stack_0_postag, stack_0_form,stack_1_postag,stack_1_form,queue_0_postag,queue_0_form_queue_1_postag,queue_1_form_forward_word_postag,forward_word_form_backward_word_postag_backward_word_form]
        features.append(stack[0]['postag'])
        features.append(stack[0]['form'])
        try:
            features.append(stack[1]['postag'])
            features.append(stack[1]['form'])

        except:
            features.append("nil")
            features.append("nil")
    except:
        features.append("nil")
        features.append("nil")
        features.append("nil")
        features.append("nil")
    try:

        features.append(queue[0]['postag'])
        features.append(queue[0]['form'])
        features.append(queue[1]['postag'])
        features.append(queue[1]['form'])
    except:
        features.append(queue[0]['postag'])
        features.append(queue[0]['form'])
        features.append("nil")
        features.append("nil")
    #print(sentence['id'])
    try:
        id = stack[0]['id']
        for ids in range(len(sentence)):
            if sentence[ids]['id'] == id:
                features.append(sentence[ids + 1]['postag'])
                features.append(sentence[ids + 1]['form'])
    except:
        features.append('nil')
        features.append('nil')

    try:
        id = stack[0]['id']
        for ids in range(len(sentence)):
            if sentence[ids]['id'] == id:
                features.append(sentence[ids - 1]['postag'])
                features.append(sentence[ids - 1]['form'])
    except:
        features.append('nil')
        features.append('nil')

    if transition.can_leftarc(stack, graph):
        can_la = True
    else:
        can_la = False
    if transition.can_reduce(stack, graph):
        can_re = True
    else:
        can_re = False
    features.append(can_re)
    features.append(can_la)

    return features

示例#30

0

显示文件

def extract(stack, queue, graph, feature_names, sentence):
    features = list()

    POS_TAG = "postag"
    WORD_TAG = "form"
    ID_TAG = "id"
    DEPREL_TAG = "deprel"
    NULL_VALUE = "nil"
    HEAD_TAG = "head"
    PHEAD_TAG = "phead"
    PDEPREL_TAG = "pdeprel"

    if stack:
        stack_0_pos = stack[0][POS_TAG]
        stack_0_word = stack[0][WORD_TAG]
    else:
        stack_0_pos = NULL_VALUE
        stack_0_word = NULL_VALUE

    if len(stack) > 1:
        stack_1_pos = stack[1][POS_TAG]
        stack_1_word = stack[1][WORD_TAG]
    else:
        stack_1_pos = NULL_VALUE
        stack_1_word = NULL_VALUE

    if queue:
        queue_0_pos = queue[0][POS_TAG]
        queue_0_word = queue[0][WORD_TAG]
    else:
        queue_0_pos = NULL_VALUE
        queue_0_word = NULL_VALUE

    if len(queue) > 1:
        queue_1_pos = queue[1][POS_TAG]
        queue_1_word = queue[1][WORD_TAG]
    else:
        queue_1_pos = NULL_VALUE
        queue_1_word = NULL_VALUE

    if len(feature_names) == 6:
        features.append(stack_0_word)
        features.append(stack_0_pos)

        features.append(queue_0_word)
        features.append(queue_0_pos)

        features.append(transition.can_reduce(stack, graph))
        features.append(transition.can_leftarc(stack, graph))

    elif len(feature_names) == 10:
        features.append(stack_0_word)
        features.append(stack_0_pos)

        features.append(stack_1_word)
        features.append(stack_1_pos)

        features.append(queue_0_word)
        features.append(queue_0_pos)

        features.append(queue_1_word)
        features.append(queue_1_pos)

        features.append(transition.can_reduce(stack, graph))
        features.append(transition.can_leftarc(stack, graph))

    elif len(feature_names) == 14:
        # No word
        if stack_0_word == NULL_VALUE:
            after_stack_0_word = NULL_VALUE
            after_stack_0_pos = NULL_VALUE

            before_stack_0_word = NULL_VALUE
            before_stack_0_pos = NULL_VALUE

        else:
            id_stack_0 = int(stack[0]['id'])
            # Last word
            if id_stack_0 == len(sentence) - 1:
                after_stack_0_word = NULL_VALUE
                after_stack_0_pos = NULL_VALUE
            else:
                next_word = sentence[id_stack_0 + 1]
                after_stack_0_word = next_word[WORD_TAG]
                after_stack_0_pos = next_word[POS_TAG]
            # First word
            if id_stack_0 == 0:
                before_stack_0_word = NULL_VALUE
                before_stack_0_pos = NULL_VALUE
            else:
                previous_word = sentence[id_stack_0]
                before_stack_0_word = previous_word[WORD_TAG]
                before_stack_0_pos = previous_word[POS_TAG]

        features.append(stack_0_word)
        features.append(stack_0_pos)

        features.append(stack_1_word)
        features.append(stack_1_pos)

        features.append(queue_0_word)
        features.append(queue_0_pos)

        features.append(queue_1_word)
        features.append(queue_1_pos)

        features.append(after_stack_0_word)
        features.append(after_stack_0_pos)

        features.append(before_stack_0_word)
        features.append(before_stack_0_pos)

        features.append(transition.can_reduce(stack, graph))
        features.append(transition.can_leftarc(stack, graph))

    features = dict(zip(feature_names, features))

    return features

示例#31

0

显示文件

文件： features.py 项目： lintgren/EDAN20

def extract2(stack, queue, state, feature_names, sentence):
    """
     Extract the features from one sentence
     returns X and y, where X is a list of dictionaries and
     y is a list of symbols
     :param sentence:
     :param w_size:
     :return:
     """
    tmpsiblings = ['nil', 'nil']
    left = 0
    right = 1000
    if (len(stack) > 0 and len(state) > 0):
        #print(state['heads'])
        for key, value in (state['heads'].items()):
            if (int(key) < int(stack[0]['id'])
                    and int(value) == int(stack[0]['head'])):
                #print(str(key)+' ' + str(value))
                #print(stack[0]['id'])
                if left < int(key):
                    left = int(key)
            elif int(key) > int(stack[0]['id']) and int(value) == int(
                    stack[0]['head']):
                if right > int(key):
                    right = int(key)
        if right < 1000:
            #print('right')
            #print(stack[0]['form'])
            #print(sentence[right]['form'])
            tmpsiblings[1] = sentence[right]['form']
            pass
        if (left > 0):
            #print('left')
            #print(stack[0]['form'])
            #print(sentence[left]['form'])
            tmpsiblings[0] = sentence[left]['form']
            pass
            #if(indx < stack[0]['id'] and head):
        #print(sentence[stack[0]['id']+1])
        #while(head > 0):
        #   print(stack[0]['head'])
    x = []
    tmpListStack = ['nil', 'nil', 'nil', 'nil']
    tmpListQueue = ['nil', 'nil', 'nil', 'nil']
    tmpNextWord = ['nil', 'nil']

    try:
        tmpNextWord[1] = sentence[int(stack[0]['id']) + 1]['postag']
        tmpNextWord[1] = sentence[int(stack[0]['id']) + 1]['form']
    except:
        pass
    try:
        tmpListStack[0] = stack[0]['form']
        tmpListStack[1] = stack[0]['postag']
        tmpListStack[2] = stack[1]['form']
        tmpListStack[3] = stack[1]['postag']
    except:
        pass
    try:
        tmpListQueue[0] = queue[0]['form']
        tmpListQueue[1] = queue[0]['postag']
        tmpListQueue[2] = queue[1]['form']
        tmpListQueue[3] = queue[1]['postag']
    except:
        pass
    x.extend(tmpListStack)
    x.extend(tmpListQueue)
    x.append(transition.can_rightarc(stack))
    x.append(transition.can_reduce(stack, state))
    x.extend(tmpNextWord)
    x.extend(tmpsiblings)
    # We represent the feature vector as a dictionary
    # The classes are stored in a list
    #y.append(padded_sentence[i + w_size][2])
    return dict(zip(feature_names, x))