Пример #1
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.dfl = DepTo4lang(cfg)
     self.dep_processor = DependencyProcessor(cfg)
     self.vocabulary = {}
     self.words = []
     self.binary_vocab = {}
     self.binary_words = []
     self.coocc = [], [], []
     self.zero_array = None
     self.binary_array = None
Пример #2
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.lang = self.cfg.get("deps", "lang")
     self.deps_dir = self.cfg.get('text', 'deps_dir')
     # self.machines_dir = self.cfg.get('text', 'machines_dir')
     self.graphs_dir = cfg.get('text', 'graph_dir')
     map(ensure_dir, (self.deps_dir, self.graphs_dir))  # self.machines_dir
     if self.lang == 'en':
         self.parser_wrapper = CoreNLPWrapper(self.cfg)
     elif self.lang == 'hu':
         self.parser_wrapper = Magyarlanc(self.cfg)
     self.dep_to_4lang = DepTo4lang(self.cfg)
Пример #3
0
 def __init__(self, cfg, direct_parse=False):
     self.cfg = cfg
     self.lang = self.cfg.get("deps", "lang")
     if not direct_parse:
         self.deps_dir = self.cfg.get('text', 'deps_dir')
         ensure_dir(self.deps_dir)
     # self.machines_dir = self.cfg.get('text', 'machines_dir')
     self.graphs_dir = cfg.get('text', 'graph_dir')
     ensure_dir(self.graphs_dir)
     if self.lang == 'en':
         self.parser_wrapper = CoreNLPWrapper(self.cfg)
     elif self.lang == 'hu':
         self.parser_wrapper = Magyarlanc(self.cfg)
     self.dep_to_4lang = DepTo4lang(self.cfg, direct_parse)
Пример #4
0
def main():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s : " +
        "%(module)s (%(lineno)s) - %(levelname)s - %(message)s")
    cfg_file = sys.argv[1] if len(sys.argv) > 1 else None
    no_threads = int(sys.argv[2]) if len(sys.argv) > 2 else 1
    cfg = get_cfg(cfg_file)

    dict_to_4lang = DictTo4lang(cfg)
    dict_to_4lang.run(no_threads)
    dict_to_4lang.print_dict()

    dep_to_4lang = DepTo4lang(cfg)
    dep_to_4lang.dep_to_4lang()
    dep_to_4lang.save_machines()
    dep_to_4lang.print_graphs()
Пример #5
0
    def process(self, sens, print_deps=False):
        logging.info("running parser...")
        corenlp_wrapper = CoreNLPWrapper(self.cfg)
        parsed_sens, corefs = corenlp_wrapper.parse_sentences(sens)
        logging.info("parsed {0} sentences".format(len(parsed_sens)))
        if print_deps:
            self.print_deps(parsed_sens)

        logging.info("loading dep_to_4lang...")
        logging.getLogger().setLevel(__MACHINE_LOGLEVEL__)
        dep_to_4lang = DepTo4lang(self.cfg)

        logging.info("processing sentences...")
        words_to_machines = dep_to_4lang.get_machines_from_deps_and_corefs(
            parsed_sens, corefs)

        logging.info("done, processed {0} sentences".format(len(parsed_sens)))

        return words_to_machines