Exemplo n.º 1
0
def test_convert_pathlib():
    data = random.choice([GLOVE, W2V, W2V_TEXT, LEADER])
    output_type = random.choice(list(FileType))
    input_path = DATA / data
    gold_output_path = os.path.splitext(str(input_path))[0] + "." + str(output_type)
    with patch("word_vectors.convert_module.write") as write_patch:
        w, wv = read(input_path)
        convert(input_path, output_file_type=output_type)
        call_file, call_w, call_wv, call_type = write_patch.call_args_list[0][0]
        assert str(call_file) == gold_output_path
        assert call_w == w
        np.testing.assert_allclose(call_wv, wv)
        assert call_type is output_type
Exemplo n.º 2
0
def test_convert_with_output_pathlib():
    data = random.choice([GLOVE, W2V, W2V_TEXT, LEADER])
    output_type = random.choice(list(FileType))
    output = pathlib.Path(rand_str())
    input_path = DATA / data
    with patch("word_vectors.convert_module.write") as write_patch:
        w, wv = read(input_path)
        convert(input_path, output, output_file_type=output_type)
        call_file, call_w, call_wv, call_type = write_patch.call_args_list[0][0]
        assert call_file == output
        assert call_w == w
        assert call_type == output_type
        np.testing.assert_allclose(call_wv, wv)
Exemplo n.º 3
0
def test_convert_with_input_pathlib():
    data = random.choice([GLOVE, W2V, W2V_TEXT, LEADER])
    input_type = INPUT_MAPPING[data]
    output_type = random.choice(list(FileType))
    input_path = DATA / data
    output = pathlib.Path(rand_str())
    with patch("word_vectors.convert_module.read") as read_patch:
        with patch("word_vectors.convert_module.write") as write_patch:
            w, wv = read(input_path)
            read_patch.return_value = (w, wv)
            convert(input_path, output, output_file_type=output_type, input_file_type=input_type)
            read_patch.assert_called_once_with(input_path, input_type)
            call_file, call_w, call_wv, call_type = write_patch.call_args_list[0][0]
            assert call_file == output
            assert call_w == w
            assert call_type == output_type
            np.testing.assert_allclose(call_wv, wv)
Exemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser(
        description="Convert Pre-trained embeddings between different formats")
    parser.add_argument("embeddings")
    parser.add_argument("--output-format",
                        "--output_format",
                        default=FileType.LEADER,
                        type=FileType.from_string)
    parser.add_argument("--input-format",
                        "--input_format",
                        type=FileType.from_string)
    parser.add_argument("--output", help="The output path.")
    args = parser.parse_args()

    convert(args.embeddings,
            output=args.output,
            output_file_type=args.output_format,
            input_file_type=args.input_format)
Exemplo n.º 5
0
def test_convert_with_output_open():
    data = random.choice([GLOVE, W2V, W2V_TEXT, LEADER])
    output_type = random.choice(list(FileType))
    output = rand_str()
    input_path = DATA / data
    print(output)
    try:
        with open(input_path, "r" if data in (GLOVE, W2V_TEXT) else "rb") as input_path:
            with open(output, "w" if output_type in (FileType.GLOVE, FileType.W2V_TEXT) else "wb") as output:
                with patch("word_vectors.convert_module.write") as write_patch:
                    w, wv = read(input_path)
                    convert(input_path, output, output_file_type=output_type)
                    call_file, call_w, call_wv, call_type = write_patch.call_args_list[0][0]
                    assert call_file == output
                    assert call_w == w
                    assert call_type == output_type
                    np.testing.assert_allclose(call_wv, wv)
    finally:
        os.remove(output.name)