Пример #1
0
def get_noun_scene_heads(passage):
    fnodes = [
        x for x in passage.layer(layer1.LAYER_ID).all
        if x.tag == layer1.NodeTags.Foundational and x.is_scene()
    ]
    heads = [scenes.extract_head(x) for x in fnodes]
    noun_heads = [x for x in heads if x is not None and scenes.is_noun(x)]
    return noun_heads
Пример #2
0
def main():

    dbfile = sys.argv[1]
    with open(dbfile, 'rb') as f:
        passages = pickle.load(f)

    nouns = set()
    for p in passages:
        sc = scenes.extract_possible_scenes(p)
        heads = [scenes.extract_head(x) for x in sc]
        heads = [x for x in heads if x is not None]
        nouns.update(scenes.filter_noun_heads(heads))

    print('\n'.join(nouns))
Пример #3
0
def run_file(path, eng, stats):
    """Site XML file ==> prints list of sceneness results"""
    with open(path) as f:
        root = ETree.ElementTree().parse(f)
    passage = convert.from_site(root)

    sc = scenes.extract_possible_scenes(passage)
    heads = [scenes.extract_head(x) for x in sc]

    for s, h in zip(sc, heads):
        if h is None:
            stats.heads.append(Result(s))
            continue
        out = eng.get_categories(s, h)
        if out == 'implicit':
            stats.heads.append(Result(s))
        elif out == 'no base form':
            stats.lemmas.append(Result(s, h))
        elif out[2]:
            stats.fulls.append(Result(s, h, *out))
        else:
            stats.no_cats.append(Result(s, h, *out))
Пример #4
0
def run_file(path, eng, stats):
    """Site XML file ==> prints list of sceneness results"""
    with open(path) as f:
        root = ETree.ElementTree().parse(f)
    passage = convert.from_site(root)

    sc = scenes.extract_possible_scenes(passage)
    heads = [scenes.extract_head(x) for x in sc]

    for s, h in zip(sc, heads):
        if h is None:
            stats.heads.append(Result(s))
            continue
        out = eng.get_categories(s, h)
        if out == 'implicit':
            stats.heads.append(Result(s))
        elif out == 'no base form':
            stats.lemmas.append(Result(s, h))
        elif out[2]:
            stats.fulls.append(Result(s, h, *out))
        else:
            stats.no_cats.append(Result(s, h, *out))
Пример #5
0
def get_noun_scene_heads(passage):
    fnodes = [x for x in passage.layer(layer1.LAYER_ID).all
              if x.tag == layer1.NodeTags.Foundational and x.is_scene()]
    heads = [scenes.extract_head(x) for x in fnodes]
    noun_heads = [x for x in heads if x is not None and scenes.is_noun(x)]
    return noun_heads
Пример #6
0
 def test_extract_head(self):
     """Tests that the API isn't broken, not validity of the result."""
     passage = Layer1Tests._create_passage()
     for x in scenes.extract_possible_scenes(passage):
         scenes.extract_head(x)
Пример #7
0
def get_noun_heads(passage):
    fnodes = scenes.extract_non_scenes_AGCE(passage)
    heads = [scenes.extract_head(fnode) for fnode in fnodes]
    noun_heads = [head for head in heads
                  if head is not None and scenes.is_noun(head)]
    return noun_heads