def main(): dbfile = sys.argv[1] with open(dbfile, 'rb') as f: passages = pickle.load(f) nouns = set() for p in passages: sc = scenes.extract_possible_scenes(p) heads = [scenes.extract_head(x) for x in sc] heads = [x for x in heads if x is not None] nouns.update(scenes.filter_noun_heads(heads)) print('\n'.join(nouns))
def run_file(path, eng, stats): """Site XML file ==> prints list of sceneness results""" with open(path) as f: root = ETree.ElementTree().parse(f) passage = convert.from_site(root) sc = scenes.extract_possible_scenes(passage) heads = [scenes.extract_head(x) for x in sc] for s, h in zip(sc, heads): if h is None: stats.heads.append(Result(s)) continue out = eng.get_categories(s, h) if out == 'implicit': stats.heads.append(Result(s)) elif out == 'no base form': stats.lemmas.append(Result(s, h)) elif out[2]: stats.fulls.append(Result(s, h, *out)) else: stats.no_cats.append(Result(s, h, *out))
def test_extract_head(self): """Tests that the API isn't broken, not validity of the result.""" passage = Layer1Tests._create_passage() for x in scenes.extract_possible_scenes(passage): scenes.extract_head(x)
def test_possible_scenes(self): """Tests that the API isn't broken, not validity of the result.""" elem = ConversionTests._load_xml('./site3.xml') passage = convert.from_site(elem) scenes.extract_possible_scenes(passage)