Exemplo n.º 1
0
    def samediff_val(normalise=False):
        # Embed validation
        np.random.seed(options_dict["rnd_seed"])
        val_batch_iterator = batching.LabelledIterator(
            val_x, None, val_x.shape[0], False
            )
        labels = [val_labels[i] for i in val_batch_iterator.indices]
        speakers = [val_speakers[i] for i in val_batch_iterator.indices]
        saver = tf.train.Saver()
        with tf.Session() as session:
            saver.restore(session, val_model_fn)
            for batch_x in val_batch_iterator:
                np_z = session.run(
                    [output], feed_dict={x: batch_x}
                    )[0]
                break  # single batch

        embed_dict = {}
        for i, utt_key in enumerate(
                [val_keys[i] for i in val_batch_iterator.indices]):
            embed_dict[utt_key] = np_z[i]

        # Same-different
        if normalise:
            np_z_normalised = (np_z - np_z.mean(axis=0))/np_z.std(axis=0)
            distances = pdist(np_z_normalised, metric="cosine")
        else:
            distances = pdist(np_z, metric="cosine")
        # matches = samediff.generate_matches_array(labels)
        # ap, prb = samediff.average_precision(
        #     distances[matches == True], distances[matches == False]
        #     )
        word_matches = samediff.generate_matches_array(labels)
        speaker_matches = samediff.generate_matches_array(speakers)
        sw_ap, sw_prb, swdp_ap, swdp_prb = samediff.average_precision_swdp(
            distances[np.logical_and(word_matches, speaker_matches)],
            distances[np.logical_and(word_matches, speaker_matches == False)],
            distances[word_matches == False]
            )
        # return [sw_prb, -sw_ap, swdp_prb, -swdp_ap]
        return [swdp_prb, -swdp_ap]
Exemplo n.º 2
0
def train_siamese_cnn(options_dict):
    """Train and save a Siamese CNN triplets network."""

    # PRELIMINARY

    print(datetime.now())

    # Output directory
    hasher = hashlib.md5(repr(sorted(options_dict.items())).encode("ascii"))
    hash_str = hasher.hexdigest()[:10]
    model_dir = path.join(
        "models", options_dict["train_lang"] + "." + options_dict["train_tag"],
        options_dict["script"], hash_str
        )
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Model directory:", model_dir)
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)
    print("Options:", options_dict)

    # Random seeds
    random.seed(options_dict["rnd_seed"])
    np.random.seed(options_dict["rnd_seed"])
    tf.set_random_seed(options_dict["rnd_seed"])


    # LOAD AND FORMAT DATA

    # Training data
    train_tag = options_dict["train_tag"]
    npz_fn = path.join(
        "data", options_dict["train_lang"], "train." + train_tag + ".npz"
        )
    train_x, train_labels, train_lengths, train_keys, train_speakers = (
        data_io.load_data_from_npz(npz_fn, None)
        )

    # Convert training labels to integers
    train_label_set = list(set(train_labels))
    label_to_id = {}
    for i, label in enumerate(sorted(train_label_set)):
        label_to_id[label] = i
    train_y = []
    for label in train_labels:
        train_y.append(label_to_id[label])
    train_y = np.array(train_y, dtype=NP_ITYPE)

    # Validation data
    if options_dict["val_lang"] is not None:
        npz_fn = path.join("data", options_dict["val_lang"], "val.npz")
        val_x, val_labels, val_lengths, val_keys, val_speakers = (
            data_io.load_data_from_npz(npz_fn)
            )

    # Zero-pad sequences
    max_length = options_dict["max_length"]
    print("Limiting length:", max_length)
    train_x, _ = data_io.pad_sequences(train_x, max_length, True)
    train_x = np.transpose(train_x, (0, 2, 1))
    if options_dict["val_lang"] is not None:
        val_x, _ = data_io.pad_sequences(val_x, max_length, True)
        val_x = np.transpose(val_x, (0, 2, 1))
    
    # Dimensionalities
    d_in = train_x.shape[1]*train_x.shape[2]
    input_shape = [-1, train_x.shape[1], train_x.shape[2], 1] 
    # [n_data, height, width, channels]
    options_dict["d_in"] = d_in
    options_dict["input_shape"] = input_shape

    # Flatten data
    train_x = train_x.reshape((-1, d_in))
    if options_dict["val_lang"] is not None:
        val_x = val_x.reshape((-1, d_in))


    # DEFINE MODEL

    print(datetime.now())
    print("Building model")

    # Model filenames
    intermediate_model_fn = path.join(model_dir, "siamese_cnn.tmp.ckpt")
    model_fn = path.join(model_dir, "siamese_cnn.best_val.ckpt")

    # Model graph
    x = tf.placeholder(TF_DTYPE, [None, d_in])
    y = tf.placeholder(TF_ITYPE, [None])
    network_dict = build_siamese_cnn_from_options_dict(x, options_dict)
    output = network_dict["output"]

    # Semi-hard triplets loss
    loss = tf.contrib.losses.metric_learning.triplet_semihard_loss(
        labels=y, embeddings=output, margin=options_dict["margin"]
        )
    optimizer = tf.train.AdamOptimizer(
        learning_rate=options_dict["learning_rate"]
        ).minimize(loss)

    # Save options_dict
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Writing:", options_dict_fn)
    with open(options_dict_fn, "wb") as f:
        pickle.dump(options_dict, f, -1)


    # TRAIN AND VALIDATE

    print(datetime.now())
    print("Training model")

    # Validation function
    def samediff_val(normalise=False):
        # Embed validation
        np.random.seed(options_dict["rnd_seed"])
        val_batch_iterator = batching.LabelledIterator(
            val_x, None, val_x.shape[0], False
            )
        labels = [val_labels[i] for i in val_batch_iterator.indices]
        speakers = [val_speakers[i] for i in val_batch_iterator.indices]
        saver = tf.train.Saver()
        with tf.Session() as session:
            saver.restore(session, val_model_fn)
            for batch_x in val_batch_iterator:
                np_z = session.run(
                    [output], feed_dict={x: batch_x}
                    )[0]
                break  # single batch

        embed_dict = {}
        for i, utt_key in enumerate(
                [val_keys[i] for i in val_batch_iterator.indices]):
            embed_dict[utt_key] = np_z[i]

        # Same-different
        if normalise:
            np_z_normalised = (np_z - np_z.mean(axis=0))/np_z.std(axis=0)
            distances = pdist(np_z_normalised, metric="cosine")
        else:
            distances = pdist(np_z, metric="cosine")
        # matches = samediff.generate_matches_array(labels)
        # ap, prb = samediff.average_precision(
        #     distances[matches == True], distances[matches == False]
        #     )
        word_matches = samediff.generate_matches_array(labels)
        speaker_matches = samediff.generate_matches_array(speakers)
        sw_ap, sw_prb, swdp_ap, swdp_prb = samediff.average_precision_swdp(
            distances[np.logical_and(word_matches, speaker_matches)],
            distances[np.logical_and(word_matches, speaker_matches == False)],
            distances[word_matches == False]
            )
        # return [sw_prb, -sw_ap, swdp_prb, -swdp_ap]
        return [swdp_prb, -swdp_ap]

    # Train Siamese CNN model
    val_model_fn = intermediate_model_fn
    train_batch_iterator = batching.LabelledIterator(
        train_x, train_y, options_dict["batch_size"], shuffle_every_epoch=True
        )
    if options_dict["val_lang"] is None:
        record_dict = training.train_fixed_epochs(
            options_dict["n_epochs"], optimizer, loss, train_batch_iterator,
            [x, y], save_model_fn=intermediate_model_fn,
            )
    else:
        record_dict = training.train_fixed_epochs_external_val(
            options_dict["n_epochs"], optimizer, loss, train_batch_iterator,
            [x, y], samediff_val, save_model_fn=intermediate_model_fn,
            save_best_val_model_fn=model_fn,
            n_val_interval=options_dict["n_val_interval"]
            )

    # Save record
    record_dict_fn = path.join(model_dir, "record_dict.pkl")
    print("Writing:", record_dict_fn)
    with open(record_dict_fn, "wb") as f:
        pickle.dump(record_dict, f, -1)


    # FINAL EXTRINSIC EVALUATION

    if options_dict["val_lang"] is not None:
        print ("Performing final validation")
        if options_dict["extrinsic_usefinal"]:
            val_model_fn = intermediate_model_fn
        else:
            val_model_fn = model_fn
        # sw_prb, sw_ap, swdp_prb, swdp_ap = samediff_val(normalise=False)
        swdp_prb, swdp_ap = samediff_val(normalise=False)
        # sw_ap = -sw_ap
        swdp_ap = -swdp_ap
        swdp_prb_normalised, swdp_ap_normalised = samediff_val(normalise=True)
        # sw_ap_normalised = -sw_ap_normalised
        swdp_ap_normalised = -swdp_ap_normalised
        print("Validation SWDP AP:", swdp_ap)
        print("Validation SWDP AP with normalisation:", swdp_ap_normalised)
        ap_fn = path.join(model_dir, "val_ap.txt")
        print("Writing:", ap_fn)
        with open(ap_fn, "w") as f:
            f.write(str(swdp_ap) + "\n")
            f.write(str(swdp_ap_normalised) + "\n")
        print("Validation model:", val_model_fn)

    print(datetime.now())
Exemplo n.º 3
0
def apply_model(model_fn, subset, batch_size=None):

    # assert language is None  # to-do

    # Load the model options
    model_dir = path.split(model_fn)[0]
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Reading:", options_dict_fn)
    with open(options_dict_fn, "rb") as f:
        options_dict = pickle.load(f)

    # Load data
    npz_fn = path.join(options_dict["data_dir"], subset + ".npz")
    # if language is not None:
    #     if "buckeye" in npz_fn:
    #         npz_fn = npz_fn.replace("buckeye", language)
    #     elif "xitsonga" in npz_fn:
    #         npz_fn = npz_fn.replace("xitsonga", language)
    x_data, labels, lengths, keys = data_io.load_data_from_npz(npz_fn)

    if "cnn" in options_dict["script"]:

        # Pad and flatten data
        x_data, _ = data_io.pad_sequences(x_data, options_dict["max_length"],
                                          True)
        x_data = np.transpose(x_data, (0, 2, 1))
        x_data = x_data.reshape((-1, options_dict["d_in"]))

        # Build model
        x = tf.placeholder(TF_DTYPE, [None, options_dict["d_in"]])
        model = build_model(x, None, options_dict)

        # Embed data
        if batch_size is None:
            batch_iterator = batching.LabelledIterator(x_data, None,
                                                       x_data.shape[0], False)
            saver = tf.train.Saver()
            with tf.Session() as session:
                saver.restore(session, model_fn)
                for batch_x in batch_iterator:
                    np_z = session.run([model["encoding"]],
                                       feed_dict={x: batch_x})[0]
                    break  # single batch
        else:
            assert False, "need to implement"

    else:  # rnn

        # Truncate and limit dimensionality
        data_io.trunc_and_limit_dim(x_data, lengths, options_dict["n_input"],
                                    options_dict["max_length"])

        # Build model
        x = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
        x_lengths = tf.placeholder(TF_ITYPE, [None])
        model = build_model(x, x_lengths, options_dict)

        # Embed data
        if batch_size is None:
            batch_iterator = batching.SimpleIterator(x_data, len(x_data),
                                                     False)
            saver = tf.train.Saver()
            with tf.Session() as session:
                saver.restore(session, model_fn)
                for batch_x_padded, batch_x_lengths in batch_iterator:
                    np_x = batch_x_padded
                    np_x_lengths = batch_x_lengths
                    np_z = session.run([model["encoding"]],
                                       feed_dict={
                                           x: np_x,
                                           x_lengths: np_x_lengths
                                       })[0]
                    break  # single batch
        else:
            batch_iterator = batching.SimpleIterator(x_data, batch_size, False)
            saver = tf.train.Saver()
            with tf.Session() as session:
                saver.restore(session, model_fn)
                np_z = []
                for batch_x_padded, batch_x_lengths in batch_iterator:
                    np_x = batch_x_padded
                    np_x_lengths = batch_x_lengths
                    cur_np_z = session.run([model["encoding"]],
                                           feed_dict={
                                               x: np_x,
                                               x_lengths: np_x_lengths
                                           })[0]
                    print("!", cur_np_z.shape)
                    np_z.append(cur_np_z)
                np_z = np.vstack(np_z)
                print("!", np_z.shape)

    embed_dict = {}
    for i, utt_key in enumerate([keys[i] for i in batch_iterator.indices]):
        embed_dict[utt_key] = np_z[i]

    return embed_dict
Exemplo n.º 4
0
def apply_model(model_fn, language, subset, segtag):

    # Load the model options
    model_dir = path.split(model_fn)[0]
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Reading:", options_dict_fn)
    with open(options_dict_fn, "rb") as f:
        options_dict = pickle.load(f)

    # Load data and intervals
    npz_fn = path.join("data", language, subset + ".npz")
    x_data, labels, lengths, keys, speakers = data_io.load_data_from_npz(
        npz_fn
        )
    seglist_fn = path.join(
        "data", language, "search.seglist." + segtag + ".pkl"
        )
    print("Reading:", seglist_fn)
    with open(seglist_fn, "rb") as f:
        seglist_dict = pickle.load(f)
    seglists = [seglist_dict[i] for i in keys]
    print("No. utterances:", len(x_data))
    n_intervals = sum([len(i) for i in seglists])
    print("No. intervals:", n_intervals)

    # assert False
    # print("Reading:", input_npz_fn)
    # features_dict = np.load(input_npz_fn)
    # seglist_fn = path.join(
    #     "data", language, "search.seglist." + segtag + ".pkl"
    #     )
    # print("Reading:", seglist_fn)
    # with open(seglist_fn, "rb") as f:
    #     seglist_dict = pickle.load(f)
    # utterances = sorted(features_dict.keys())
    # input_sequences = [features_dict[i] for i in utterances]
    # seglists = [seglist_dict[i] for i in utterances]
    # print("No. utterances:", len(input_sequences))
    # n_intervals = sum([len(i) for i in seglists])
    # print("No. intervals:", n_intervals)

    # if "cnn" in options_dict["script"]:
    #     assert False, "to-do"
    # else:  # rnn

    # print("No. utterances:", len(input_sequences))
    # n_intervals = sum([len(i) for i in seglists])
    # print("No. intervals:", n_intervals)


    # # Load data
    # npz_fn = path.join("data", language, subset + ".npz")
    # x_data, labels, lengths, keys, speakers = data_io.load_data_from_npz(
    #     npz_fn
    #     )


    if "cnn" in options_dict["script"]:

        assert False, "to-do"

        # Pad and flatten data
        x_data, _ = data_io.pad_sequences(
            x_data, options_dict["max_length"], True
            )
        x_data = np.transpose(x_data, (0, 2, 1))
        x_data = x_data.reshape((-1, options_dict["d_in"]))

        # Build model
        x = tf.placeholder(TF_DTYPE, [None, options_dict["d_in"]])
        model = build_model(x, None, options_dict)

        # Embed data
        batch_iterator = batching.LabelledIterator(
            x_data, None, x_data.shape[0], False
            )
        saver = tf.train.Saver()
        with tf.Session() as session:
            saver.restore(session, model_fn)
            for batch_x in batch_iterator:
                np_z = session.run(
                    [model["encoding"]], feed_dict={x: batch_x})[0]
                break  # single batch

    else:  # rnn
        
        # Truncate and limit dimensionality
        data_io.trunc_and_limit_dim(
            x_data, lengths, options_dict["n_input"], None
            )

        class DenseBatchFeedIterator(object):

            def __init__(self, input_sequences, seglists):
                self.input_sequences = input_sequences
                self.n_input = self.input_sequences[0].shape[-1]
                self.seglists = seglists

            def __iter__(self):
                for i_utt in range(len(self.input_sequences)):
                    
                    # Get intervals
                    seglist = self.seglists[i_utt]
                    input_sequence = self.input_sequences[i_utt]

                    # Get segments for intervals
                    segments = []
                    for i, j in seglist:
                        segments.append(input_sequence[i:j, :])

                    batch_x_lengths = [i.shape[0] for i in segments]

                    # Pad to maximum length in batch
                    batch_x_padded = np.zeros(
                        (len(batch_x_lengths), np.max(batch_x_lengths),
                        self.n_input), dtype=NP_DTYPE
                        )
                    for i, length in enumerate(batch_x_lengths):
                        seq = segments[i]
                        batch_x_padded[i, :length, :] = seq

                    yield (batch_x_padded, batch_x_lengths)

        batch_iterator = DenseBatchFeedIterator(x_data, seglists)

        # Build model
        x = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
        x_lengths = tf.placeholder(TF_ITYPE, [None])
        model = build_model(x, x_lengths, options_dict)

        # Embed data
        # batch_iterator = batching.SimpleIterator(x_data, len(x_data), False)
        saver = tf.train.Saver()
        n_outputs = 0
        embed_dict = {}
        with tf.Session() as session:
            saver.restore(session, model_fn)
            # print(datetime.now())
            print(
                "Applying model to segments ({} iterations):".format(
                len(x_data))
                )
            for i_batch, (batch_x_padded, batch_x_lengths) in \
                    tqdm(enumerate(batch_iterator)):
                cur_output = session.run(
                    [model["encoding"]], feed_dict={x: batch_x_padded,
                    x_lengths: batch_x_lengths}
                    )[0]
                utt_key = keys[i_batch]
                seglist = seglists[i_batch]
                embeddings = []
                for i in range(cur_output.shape[0]):
                    embeddings.append(cur_output[i, :])
                    n_outputs += 1
                embed_dict[utt_key] = np.array(embeddings)
            # print(datetime.now())

            # for batch_x_padded, batch_x_lengths in batch_iterator:
            #     np_x = batch_x_padded
            #     np_x_lengths = batch_x_lengths
            #     np_z = session.run(
            #         [model["encoding"]], feed_dict={x: np_x, x_lengths:
            #         np_x_lengths}
            #         )[0]
            #     break  # single batch

    print("Processed {} out of {} inputs".format(n_outputs, n_intervals))
    
    return embed_dict
Exemplo n.º 5
0
def apply_model(model_fn, npz_fn):

    # Load the model options
    model_dir = path.split(model_fn)[0]
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Reading:", options_dict_fn)
    with open(options_dict_fn, "rb") as f:
        options_dict = pickle.load(f)

    # Load data
    x_data, labels, lengths, keys, speakers = data_io.load_data_from_npz(
        npz_fn)

    if "cnn" in options_dict["script"]:

        # Pad and flatten data
        x_data, _ = data_io.pad_sequences(x_data, options_dict["max_length"],
                                          True)
        x_data = np.transpose(x_data, (0, 2, 1))
        x_data = x_data.reshape((-1, options_dict["d_in"]))

        # Build model
        x = tf.placeholder(TF_DTYPE, [None, options_dict["d_in"]])
        model = build_model(x, None, options_dict)

        # Embed data
        batch_iterator = batching.LabelledIterator(x_data, None,
                                                   x_data.shape[0], False)
        saver = tf.train.Saver()
        with tf.Session() as session:
            saver.restore(session, model_fn)
            for batch_x in batch_iterator:
                np_z = session.run([model["encoding"]], feed_dict={x:
                                                                   batch_x})[0]
                break  # single batch

    else:  # rnn

        # Truncate and limit dimensionality
        data_io.trunc_and_limit_dim(x_data, lengths, options_dict["n_input"],
                                    options_dict["max_length"])

        # Build model
        x = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
        x_lengths = tf.placeholder(TF_ITYPE, [None])
        model = build_model(x, x_lengths, options_dict)

        # Embed data
        batch_iterator = batching.SimpleIterator(x_data, len(x_data), False)
        saver = tf.train.Saver()
        with tf.Session() as session:
            saver.restore(session, model_fn)
            for batch_x_padded, batch_x_lengths in batch_iterator:
                np_x = batch_x_padded
                np_x_lengths = batch_x_lengths
                np_z = session.run([model["encoding"]],
                                   feed_dict={
                                       x: np_x,
                                       x_lengths: np_x_lengths
                                   })[0]
                break  # single batch

    embed_dict = {}
    for i, utt_key in enumerate([keys[i] for i in batch_iterator.indices]):
        embed_dict[utt_key] = np_z[i]

    return embed_dict