Exemplo n.º 1
0
                        help="Location of output file")
    parser.add_argument('--batch-size',
                        type=int,
                        help="size of batch",
                        default=32)

    args = parser.parse_args()

    MAX_NUM_TOKENS = 250
    test_instances = read_instances(args.data_file_path,
                                    MAX_NUM_TOKENS,
                                    test=True)

    vocabulary_path = os.path.join(args.load_serialization_dir, "vocab.txt")
    vocab_token_to_id, _ = load_vocabulary(vocabulary_path)

    test_instances = index_instances(test_instances, vocab_token_to_id)

    # load config
    config_path = os.path.join(args.load_serialization_dir, "config.json")
    with open(config_path, 'r') as f:
        config = json.load(f)

    # load model
    model = load_pretrained_model(args.load_serialization_dir)

    predict(model, test_instances, args.batch_size, args.prediction_file)

    if args.prediction_file:
        print(f"predictions stored at: {args.prediction_file}")
Exemplo n.º 2
0
                "vocab_size": min(VOCAB_SIZE, len(vocab_token_to_id)),
                "embedding_dim": args.embedding_dim,
                "num_layers": args.num_layers
            }
            classifier = MainClassifier(**config)
            config["type"] = "main"
        else:
            config = {
                "pretrained_model_path": args.base_model_dir,
                "layer_num": args.layer_num,
                "classes_num": 2
            }
            classifier = ProbingClassifier(**config)
            config["type"] = "probing"

    train_instances = index_instances(train_instances, vocab_token_to_id)
    validation_instances = index_instances(validation_instances,
                                           vocab_token_to_id)

    if args.model_name == "main" and args.pretrained_embedding_file:
        embeddings = load_glove_embeddings(args.pretrained_embedding_file,
                                           args.embedding_dim,
                                           vocab_id_to_token)
        classifier._embeddings.assign(tf.convert_to_tensor(embeddings))

    optimizer = optimizers.Adam()

    save_serialization_dir = os.path.join("serialization_dirs",
                                          args.model_name + args.suffix_name)
    if not os.path.exists(save_serialization_dir):
        os.makedirs(save_serialization_dir)
Exemplo n.º 3
0
    original_instance = {"text_tokens": "the film performances were awesome".split()}
    updates = ["worst", "okay", "cool"]

    updated_instances = []
    for update in updates:
        updated_instance = copy.deepcopy(original_instance)
        updated_instance["text_tokens"][4] = update
        updated_instances.append(updated_instance)
    all_instances = [original_instance]+updated_instances

    layer_representations = {}
    for seq2vec_name in choices.keys():
        model = models[seq2vec_name]
        vocab = vocabs[seq2vec_name]
        all_indexed_instances = index_instances(copy.deepcopy(all_instances), vocab)
        batches = generate_batches(all_indexed_instances, 4)
        layer_representations[seq2vec_name] = model(**batches[0],
                                                    training=False)["layer_representations"]

    for seq2vec_name, representations in layer_representations.items():
        representations = np.asarray(representations)
        differences_across_layers = {"worst": [], "okay": [], "cool": []}
        for layer_num in choices[seq2vec_name]:
            original_representation = representations[0, layer_num-1, :]
            updated_representations = representations[1:, layer_num-1,:]
            differences = [sum(np.abs(original_representation-updated_representation))
                           for updated_representation in updated_representations]
            differences_across_layers["worst"].append(float(differences[0]))
            differences_across_layers["okay"].append(float(differences[1]))
            differences_across_layers["cool"].append(float(differences[2]))