def load(cls, path): import os from pola.machine.topic_model.resource import FileResource from pola.machine.topic_model.resource import PickleResource ext = os.path.splitext(os.path.basename(path))[1].lower() candidates = [] if ext == "txt": r = FileResource(path) candidates = r.load(deserializer=cls.__init__) elif ext == "pickle": r = PickleResource(path) candidates = r.load() return candidates
p = PickleResource(path) doc = p.load() if args.freq > 0: doc.cut_frequent(args.freq) doc.cut_pos({"pos": ["動詞", "副詞"], "class1": ["接尾", "副詞可能"], "class2": ["人名", "地域", "副詞可能"]}) if args.under > 0: doc.cut_under(args.under) if args.above > 0: doc.cut_above(args.above) if args.ignore: ig_path = os.path.join(os.path.dirname(path), args.ignore) ig = FileResource(ig_path) words = ig.load() for w in words: doc.remove_vocab(w[0]) doc.show_vocab(show_pos=True) if args.save: fname = os.path.basename(path) doc_fname = os.path.splitext(fname)[0] + "_edited.pickle" doc_path = os.path.join(os.path.dirname(path), "./" + doc_fname) pe = PickleResource(doc_path) pe.save(doc)