def __init__(self, cfg): self.cfg = cfg self.dfl = DepTo4lang(cfg) self.dep_processor = DependencyProcessor(cfg) self.vocabulary = {} self.words = [] self.binary_vocab = {} self.binary_words = [] self.coocc = [], [], [] self.zero_array = None self.binary_array = None
def __init__(self, cfg): self.cfg = cfg self.lang = self.cfg.get("deps", "lang") self.deps_dir = self.cfg.get('text', 'deps_dir') # self.machines_dir = self.cfg.get('text', 'machines_dir') self.graphs_dir = cfg.get('text', 'graph_dir') map(ensure_dir, (self.deps_dir, self.graphs_dir)) # self.machines_dir if self.lang == 'en': self.parser_wrapper = CoreNLPWrapper(self.cfg) elif self.lang == 'hu': self.parser_wrapper = Magyarlanc(self.cfg) self.dep_to_4lang = DepTo4lang(self.cfg)
def __init__(self, cfg, direct_parse=False): self.cfg = cfg self.lang = self.cfg.get("deps", "lang") if not direct_parse: self.deps_dir = self.cfg.get('text', 'deps_dir') ensure_dir(self.deps_dir) # self.machines_dir = self.cfg.get('text', 'machines_dir') self.graphs_dir = cfg.get('text', 'graph_dir') ensure_dir(self.graphs_dir) if self.lang == 'en': self.parser_wrapper = CoreNLPWrapper(self.cfg) elif self.lang == 'hu': self.parser_wrapper = Magyarlanc(self.cfg) self.dep_to_4lang = DepTo4lang(self.cfg, direct_parse)
def main(): logging.basicConfig( level=logging.INFO, format="%(asctime)s : " + "%(module)s (%(lineno)s) - %(levelname)s - %(message)s") cfg_file = sys.argv[1] if len(sys.argv) > 1 else None no_threads = int(sys.argv[2]) if len(sys.argv) > 2 else 1 cfg = get_cfg(cfg_file) dict_to_4lang = DictTo4lang(cfg) dict_to_4lang.run(no_threads) dict_to_4lang.print_dict() dep_to_4lang = DepTo4lang(cfg) dep_to_4lang.dep_to_4lang() dep_to_4lang.save_machines() dep_to_4lang.print_graphs()
def process(self, sens, print_deps=False): logging.info("running parser...") corenlp_wrapper = CoreNLPWrapper(self.cfg) parsed_sens, corefs = corenlp_wrapper.parse_sentences(sens) logging.info("parsed {0} sentences".format(len(parsed_sens))) if print_deps: self.print_deps(parsed_sens) logging.info("loading dep_to_4lang...") logging.getLogger().setLevel(__MACHINE_LOGLEVEL__) dep_to_4lang = DepTo4lang(self.cfg) logging.info("processing sentences...") words_to_machines = dep_to_4lang.get_machines_from_deps_and_corefs( parsed_sens, corefs) logging.info("done, processed {0} sentences".format(len(parsed_sens))) return words_to_machines