示例#1
0
def main():
    config = utilities.config_file_reader()
    path_to_moses = utilities.safe_string(
        config.get("Environment Settings", "path_to_moses_decoder"))

    server = Server(path_to_moses)
    server.translate_interactive("es-en.working")
示例#2
0
def main():
    config = utilities.config_file_reader()
    path_to_moses = utilities.safe_string(config.get("Environment Settings", "path_to_moses_decoder"))
    mem_limit = config.getint("Environment Settings", "mem_limit")
    max_len = config.getint("Iteration Settings", "max_sentence_len")
    min_len = config.getint("Iteration Settings", "min_sentence_len")
    parser = Parser(path_to_moses, mem_limit, max_len, min_len, True)

    parser.tokenize("data/src/europarl-v7.es-en.es")
    parser.tokenize("data/src/europarl-v7.es-en.en")
    parser.tokenize("data/src/europarl-v7.fr-en.en")
    parser.tokenize("data/src/europarl-v7.fr-en.fr")

    parser.cleanse("data/europarl-v7.es-en.es.tok", "data/europarl-v7.es-en.en.tok")
    parser.cleanse("data/europarl-v7.fr-en.en.tok", "data/europarl-v7.fr-en.fr.tok")

    parser.split_train_tune_test("data/europarl-v7.es-en.es.tok.cleansed", "data/europarl-v7.es-en.en.tok.cleansed",
        "data/europarl-v7.fr-en.en.tok.cleansed", "data/europarl-v7.fr-en.fr.tok.cleansed", .6, .2)

    # Makes sense to do this to the training/tune data if the training/tune data is too large
    parser.subset("data/train/europarl-v7.es-en.es.tok.cleansed.train", "data/train/europarl-v7.es-en.en.tok.cleansed.train", .5, "train/")
    parser.subset("data/train/europarl-v7.fr-en.en.tok.cleansed.train", "data/train/europarl-v7.fr-en.fr.tok.cleansed.train", .5, "train/")

    # Necessary to do for test data to be consistent
    parser.match("data/test/europarl-v7.es-en.es.tok.cleansed.test", "data/test/europarl-v7.es-en.en.tok.cleansed.test",
        "data/test/europarl-v7.fr-en.en.tok.cleansed.test", "data/test/europarl-v7.fr-en.fr.tok.cleansed.test")
示例#3
0
def main():
    config = utilities.config_file_reader()
    NCPUS = config.getint("Environment Settings", "ncpus")
    path_to_moses = utilities.safe_string(config.get("Environment Settings", "path_to_moses_decoder"))

    tuner = Tune(path_to_moses, NCPUS)
    tuner.tune("data/tune/europarl-v7.es-en.es.tok.cleansed.tune",
        "data/tune/europarl-v7.es-en.en.tok.cleansed.tune", "es-en.working")
    tuner.tune("data/tune/europarl-v7.fr-en.en.tok.cleansed.tune",
        "data/tune/europarl-v7.fr-en.fr.tok.cleansed.tune", "en-fr.working")
def main():
    config = utilities.config_file_reader()
    path_to_moses = utilities.safe_string(config.get("Environment Settings", "path_to_moses_decoder"))

    test = Test(path_to_moses)
    test.test_translation_quality("data/test/europarl-v7.es-en.es.tok.cleansed.test",
        "data/test/europarl-v7.es-en.en.tok.cleansed.test", "es-en.working")
    test.test_translation_quality("data/test/europarl-v7.fr-en.en.tok.cleansed.test",
        "data/test/europarl-v7.fr-en.fr.tok.cleansed.test", "en-fr.working")
    test.test_pivoting_quality("data/test/europarl-v7.es-en.es.tok.cleansed.test.matched",
        "es-en.working", "data/test/europarl-v7.fr-en.fr.tok.cleansed.test.matched", "en-fr.working")
示例#5
0
def smtp_pipeline(config):
    path_to_moses = config.get("Environment Settings", "path_to_moses_decoder")
    mem_limit = config.getint("Environment Settings", "mem_limit")
    max_len = config.getint("Iteration Settings", "max_sentence_len")
    min_len = config.getint("Iteration Settings", "min_sentence_len")

    srcf = utilities.safe_string(config.get("Iteration Settings", "src_lang_data"))
    piv1f = utilities.safe_string(config.get("Iteration Settings", "src_piv_lang_data"))
    piv2f = utilities.safe_string(config.get("Iteration Settings", "piv_tar_lang_data"))
    tarf = utilities.safe_string(config.get("Iteration Settings", "tar_lang_data"))
    train = config.getfloat("Iteration Settings", "train_split")
    test = config.getfloat("Iteration Settings", "test_split")
    ncpus = config.getint("Environment Settings", "ncpus")
    ngram = config.getint("Environment Settings", "ngram")
    work_dir1 = utilities.safe_string(config.get("Iteration Settings", "working_dir_first_leg"))
    work_dir2 = utilities.safe_string(config.get("Iteration Settings", "working_dir_second_leg"))

    pair1, pair2 = FileDataPair(srcf, piv1f), FileDataPair(piv2f, tarf)
    raw_files = pair1.get_raw_filenames() + pair2.get_raw_filenames()
    pair1_tokenized_src, pair1_tokenized_tar = pair1.get_tokenized_filenames()
    pair2_tokenized_src, pair2_tokenized_tar = pair2.get_tokenized_filenames()
    pair1_cleansed_src, pair1_cleansed_tar = pair1.get_cleansed_filenames()
    pair2_cleansed_src, pair2_cleansed_tar = pair2.get_cleansed_filenames()

    parser = Parser(path_to_moses, mem_limit, max_len, min_len, False)
    parser.tokenize_files(raw_files)
    parser.cleanse(pair1_tokenized_src, pair1_tokenized_tar)
    parser.cleanse(pair2_tokenized_src, pair2_tokenized_tar)
    parser.split_train_tune_test(pair1_cleansed_src, pair1_cleansed_tar, \
        pair2_cleansed_src, pair2_cleansed_tar, train, test)
    parser.match(pair1_test_src, pair2_test_tar, pair2_test_src, pair2_test_tar)

    pair1_target_train_filename = pair1.get_target_train_filename()
    pair2_target_train_filename = pair2.get_target_train_filename()
    pair1_train_src, pair1_train_tar = pair1.get_train_filenames()
    pair2_train_src, pair2_train_tar = pair2.get_train_filenames()

    trainer = Train(path_to_moses, ncpus, ngram, False)
    trainer.build_language_models(pair1_target_train_filename)
    trainer.build_language_models(pair2_target_train_filename)
    trainer.train(pair1_train_src, pair1_train_tar, work_dir1)
    trainer.train(pair2_train_src, pair2_train_tar, work_dir2)

    pair1_tune_src, pair1_tune_tar = pair1.get_tune_filenames()
    pair2_tune_src, pair2_tune_tar = pair2.get_tune_filenames()

    tuner = Tune(path_to_moses, ncpus, False)
    tuner.tune(pair1_tune_src, pair1_tune_tar, work_dir1)
    tuner.tune(pair2_tune_src, pair2_tune_tar, work_dir2)

    pair1_test_src, pair1_test_tar = pair1.get_test_filenames()
    pair2_test_src, pair2_test_tar = pair2.get_test_filenames()
    pair1_test_tar = pair1.get_eval_filename()
    pair2_test_tar = pair2.get_eval_filename()

    test = Test(path_to_moses, False)
    test.test_pivoting_quality(pair1_test_tar, work_dir1,
        pair2_test_tar, work_dir2)
示例#6
0
def main():
    config = utilities.config_file_reader()
    path_to_moses = utilities.safe_string(config.get("Environment Settings", "path_to_moses_decoder"))
    NGRAM = config.getint("Environment Settings", "ngram")
    NCPUS = config.getint("Environment Settings", "ncpus")

    trainer = Train(path_to_moses, NCPUS, NGRAM)
    trainer.build_language_models("data/train/europarl-v7.es-en.en.tok.cleansed.train")
    trainer.build_language_models("data/train/europarl-v7.fr-en.fr.tok.cleansed.train")

    trainer.train("data/train/europarl-v7.es-en.es.tok.cleansed.train",
        "data/train/europarl-v7.es-en.en.tok.cleansed.train", "es-en.working")
    trainer.train("data/train/europarl-v7.fr-en.en.tok.cleansed.train",
        "data/train/europarl-v7.fr-en.fr.tok.cleansed.train", "en-fr.working")
示例#7
0
def main():
    config = utilities.config_file_reader()
    path_to_moses = utilities.safe_string(
        config.get("Environment Settings", "path_to_moses_decoder"))

    test = Test(path_to_moses)
    test.test_translation_quality(
        "data/test/europarl-v7.es-en.es.tok.cleansed.test",
        "data/test/europarl-v7.es-en.en.tok.cleansed.test", "es-en.working")
    test.test_translation_quality(
        "data/test/europarl-v7.fr-en.en.tok.cleansed.test",
        "data/test/europarl-v7.fr-en.fr.tok.cleansed.test", "en-fr.working")
    test.test_pivoting_quality(
        "data/test/europarl-v7.es-en.es.tok.cleansed.test.matched",
        "es-en.working",
        "data/test/europarl-v7.fr-en.fr.tok.cleansed.test.matched",
        "en-fr.working")
def main():
    config = utilities.config_file_reader()
    path_to_moses = utilities.safe_string(config.get("Environment Settings", "path_to_moses_decoder"))

    server = Server(path_to_moses)
    server.translate_interactive("es-en.working")
示例#9
0
    def handler(self, conn, a):
        print("* {}:{} connected...".format(a[0], a[1]))

        # Send a message asking client to identify client UUID
        conn.sendall(str.encode('auth-request'))

        player_id = None

        while True:
            try:
                data = conn.recv(1024)
                message = data.decode('UTF-8')
                message = message.replace('\n', '')

                if not data:
                    print("* {}:{} disconnected...".format(a[0], a[1]))

                    # Remove from connections
                    self.connections.remove(conn)

                    # Remove from players
                    self.players.pop(player_id, None)

                    self.disconnections.append(player_id)

                    # Prepare to close connection
                    conn.shutdown(socket.SHUT_RDWR)
                    break

                if len(message) <= 1:
                    continue

                # If user has not identified
                if not player_id:
                    # Strip any illegal input
                    player_id = safe_string(message)

                    if len(player_id) < 1:
                        continue

                    if player_id not in self.players:
                        # Set coordinate
                        coordinate = "0.0,0.0,0.0,0.0,180.0,0.0"
                        self.players[player_id] = {}
                        self.players[player_id]['location'] = coordinate
                        conn.sendall(
                            str.encode("auth-success,{},{}".format(
                                player_id, coordinate)))

                    # Send all locations of current players
                    else:
                        coordinate = self.players.get(
                            player_id,
                            {}).get('location') or "0.0,0.0,0.0,0.0,180.0,0.0"
                        conn.sendall(
                            str.encode("auth-success,{},{}".format(
                                player_id, coordinate)))

                else:
                    messages = message.split(';')

                    for msg in messages:
                        arr = msg.split(',')

                        # Handle chat message
                        if arr[0] == 'chat':
                            self.chat.append(player_id[:5] + ": " + arr[1] +
                                             ";")

                        # Handle position update
                        if arr[0] == 'position':
                            # Position
                            rx = float(arr[1])
                            ry = float(arr[2])
                            rz = float(arr[3])

                            # Rotation
                            px = float(arr[5])
                            py = float(arr[6])
                            pz = float(arr[7])

                            self.players[player_id][
                                'location'] = '{},{},{},{},{},{};'.format(
                                    rx, ry, rz, px, py, pz)
                            conn.sendall(str.encode("update-success"))

            except socket.error as e:
                # print("Error! {}".format(e))
                break

        conn.close()