def recognise_features_integration(eval_features_path, out_dir, model_path):
    (te_x, _, te_na_list) = load_hdf5_data(eval_features_path, verbose=1)

    model = load_model(model_path)  # Audio tagging
    labels_indices = [sorted(config.labels, key=str.lower).index(label) for label in config.labels]
    fusion_at = model.predict(te_x) # [:, labels_indices]

    create_folder(os.path.dirname(out_dir))
    io_task4.at_write_prob_mat_to_csv(na_list=te_na_list, prob_mat=fusion_at, out_path=out_dir)
def train_features_integration_layer(train_features_path, test_features_path, model_path):
    tr_data = h5py.File(train_features_path, 'r+')
    te_data = h5py.File(test_features_path, 'r+')

    labels = meta.get_train_labels_list()
    class_weights = compute_class_weight('balanced', np.unique(labels), labels)

    batch_size = 64
    epochs = 200

    create_folder(os.path.dirname(model_path))
    mc_top = ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True,
                             save_weights_only=False, mode='auto', period=1)

    input_shape = tr_data['x'].shape[1:]

    model = Sequential([
        BatchNormalization(input_shape=input_shape),
        Dropout(0.5),
        #Dense(2048, activation='tanh'),
        #BatchNormalization(),
        #Dropout(0.5),
        Dense(1024, activation='tanh'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(512, activation='tanh'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(256, activation='tanh'),
        BatchNormalization(),
        Dropout(0.5),
      #  Dense(256, activation='relu'),
      #  BatchNormalization(),
     #   Dropout(0.6),
        Dense(128, activation='tanh'),
        BatchNormalization(),
        Dropout(0.5),
     #   Dense(32, activation='relu'),
     #   BatchNormalization(),
     #   Dropout(0.6),
        Dense(17, activation='sigmoid')
    ])
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    gen = RatioDataGenerator(batch_size=batch_size, type='train')

    model.fit_generator(generator=gen.generate(tr_data),
                        steps_per_epoch= 100,
                        epochs=epochs,
                        verbose=1,
                        callbacks=[mc_top],
                        validation_data=(te_data['x'], te_data['y']),
                        class_weight=class_weights)
def recognise_probabilities_integration(audio_eval_outputs, visual_eval_outputs, out_dir, model_path):
    audio_predictions_file_list, audio_predictions_probability_matrix = at_read_prob_mat_csv(audio_eval_outputs)

    visual_predictions_file_list, visual_predictions_probability_matrix = at_read_prob_mat_csv(visual_eval_outputs)

    na_list, audio_predictions_probability_matrix, visual_predictions_probability_matrix = reorder_matrices(
        audio_predictions_file_list, audio_predictions_probability_matrix, visual_predictions_file_list,
        visual_predictions_probability_matrix)

    combined_predictions = np.hstack((audio_predictions_probability_matrix, visual_predictions_probability_matrix))

    model = load_model(model_path)  # Audio tagging

    labels_indices = [sorted(config.labels, key=str.lower).index(label) for label in config.labels]
    fusion_at = model.predict(combined_predictions)[:, labels_indices] #, steps=len(combined_predictions))

    create_folder(os.path.dirname(out_dir))
    io_task4.at_write_prob_mat_to_csv(na_list=na_list, prob_mat=fusion_at, out_path=out_dir)
def combine_probabilities_linear(audio_only_matrix_path, visual_only_matrix_path, combined_matrix_output_path, submission_csv_output_path):
    create_folder(os.path.dirname(combined_matrix_output_path))
    create_folder(os.path.dirname(submission_csv_output_path))

    labels = config.labels
    threshold_array = [0.30] * len(labels)

    audio_predictions_file_list, audio_predictions_probability_matrix = at_read_prob_mat_csv(
        audio_only_matrix_path)

    visual_predictions_file_list, visual_predictions_probability_matrix = at_read_prob_mat_csv(
        visual_only_matrix_path)

    na_list, audio_predictions_probability_matrix, visual_predictions_probability_matrix = reorder_matrices(
        audio_predictions_file_list, audio_predictions_probability_matrix, visual_predictions_file_list,
        visual_predictions_probability_matrix )

    # Merge predicitions by adding logs of probabilities
    alpha = 0.93
    combined_predictions_probability_matrix = np.exp((1-alpha)*np.log(visual_predictions_probability_matrix) + alpha*np.log(audio_predictions_probability_matrix))
    #combined_predictions_probability_matrix = (visual_predictions_probability_matrix + audio_predictions_probability_matrix) / 2
    #combined_predictions_probability_matrix = np.maximum(visual_predictions_probability_matrix, audio_predictions_probability_matrix)
    shape = combined_predictions_probability_matrix.shape
    combined_predictions_probability_matrix = combined_predictions_probability_matrix.reshape((shape[0], 1, shape[1]))

    # Write combined matrix to csv file
    io_task4.sed_write_prob_mat_list_to_csv(
        na_list=na_list,
        prob_mat_list=combined_predictions_probability_matrix,
        out_path=combined_matrix_output_path)

    # Write AT to submission format
    io_task4.at_write_prob_mat_csv_to_submission_csv(
        at_prob_mat_path=combined_matrix_output_path,
        lbs=labels,
        thres_ary=threshold_array,
        out_path=submission_csv_output_path)
def train_probabilities_integration_layer(audio_train_outputs, audio_test_ouputs, visual_train_outputs, visual_test_outputs, model_path):
    audio_train_predictions_file_list, audio_train_predictions_probability_matrix = at_read_prob_mat_csv(audio_train_outputs)
    audio_test_predictions_file_list, audio_test_predictions_probability_matrix = at_read_prob_mat_csv(audio_test_ouputs)

    visual_train_predictions_file_list, visual_train_predictions_probability_matrix = at_read_prob_mat_csv(visual_train_outputs)
    visual_test_predictions_file_list, visual_test_predictions_probability_matrix = at_read_prob_mat_csv(visual_test_outputs)

    train_na_list, audio_train_predictions_probability_matrix, visual_train_predictions_file_list = reorder_matrices(
        audio_train_predictions_file_list, audio_train_predictions_probability_matrix, visual_train_predictions_file_list,
        visual_train_predictions_probability_matrix)

    test_na_list, audio_test_predictions_probability_matrix, visual_test_predictions_probability_matrix = reorder_matrices(
        visual_test_predictions_file_list, visual_test_predictions_probability_matrix,
        audio_test_predictions_file_list, audio_test_predictions_probability_matrix)

    train_predictions_matrix = np.hstack((audio_train_predictions_probability_matrix, visual_train_predictions_probability_matrix))
    test_predictions_matrix = np.hstack((audio_test_predictions_probability_matrix, visual_test_predictions_probability_matrix))

    # Load in  labels
    train_labels = meta.get_labels(train_na_list, "metadata/training_set.csv")
    train_labels = np.asarray(train_labels)

    test_labels = meta.get_labels(test_na_list, "metadata/testing_set.csv")
    test_labels = np.asarray(test_labels)
    
    labels = meta.get_train_labels_list()
    class_weights = compute_class_weight('balanced', np.unique(labels), labels)

    batch_size = 32
    epochs = 50

    create_folder(os.path.dirname(model_path))
    mc_top = ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True,
                             save_weights_only=False, mode='auto', period=1)

    input_shape = train_predictions_matrix.shape[1:]

    model = Sequential([
        # Dropout(0.5, input_shape=input_shape),
        #BatchNormalization(input_shape=input_shape),
        Dense(256, activation='tanh', input_shape=input_shape),
        BatchNormalization(),
        Dropout(0.5),
        Dense(128, activation='tanh'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(64, activation='tanh'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(32, activation='tanh'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(17, activation='sigmoid')
    ])
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    gen = RatioDataGenerator(batch_size=batch_size, type='train')

    model.fit_generator(generator=gen.generate({'x': train_predictions_matrix, 'y': train_labels}),
                        steps_per_epoch=100,
                        epochs=epochs,
                        verbose=1,
                        callbacks=[mc_top],
                        validation_data=(test_predictions_matrix, test_labels),
                        class_weight=class_weights)
def pack_features(audio_train_outputs, video_feature_dir, csv_path, out_path):
    create_folder(os.path.dirname(out_path))

    audio_predictions_file_list, audio_predictions_probability_matrix = at_read_prob_mat_csv(audio_train_outputs)
    x_all, y_all, na_all = [], [], []

    with h5py.File(out_path, 'w') as hf:
        x_dset = hf.create_dataset('x', (1, 1017), maxshape=(None, 1017), dtype='f', chunks=(1, 1017))
        count = 0

        if csv_path != "":
            with open(csv_path, 'rt') as f:
                reader = csv.reader(f)
                lis = list(reader)

            for li in lis:
                [id, start, end, labels, label_ids] = li
                if count % 100 == 0: print(count)

                filename = 'Y' + id + '_' + start + '_' + end  # Correspond to the wav name.
                feature_filename = filename + ".pkl"
                audio_filename = filename[1:] + ".wav"

                audio_feature_index = audio_predictions_file_list.index(audio_filename) if audio_filename in audio_predictions_file_list else None
                video_feature_path = os.path.join(video_feature_dir, feature_filename)
                
                if audio_feature_index is None or not os.path.isfile(video_feature_path):
                    print("File %s is in the csv file but the feature is not extracted!" % filename)
                else:
                    na_all.append(audio_filename)

                    x_audio = audio_predictions_probability_matrix[audio_feature_index]
                    x_video = pickle.load(open(video_feature_path, 'rb'))
                    x_video = x_video.reshape(x_video.shape[1])
                    x = np.hstack((x_audio, x_video))

                    x_dset[-1] = x.astype(np.float32)

                    if count != (len(lis) - 1):
                        x_dset.resize(x_dset.shape[0] + 1, axis=0)

                    label_ids = label_ids.split(',')
                    y = ids_to_multinomial(label_ids)
                    y_all.append(y)
                count += 1
        else:  # Pack from features without ground truth label (dev. data)
            names = os.listdir(video_feature_dir)
            names = sorted(names)

            for feature_filename in names:
                filename = os.path.splitext(feature_filename)[0]
                audio_filename = filename[1:] + ".wav"

                audio_feature_index = audio_predictions_file_list.index(audio_filename) if audio_filename in audio_predictions_file_list else None
                video_feature_path = os.path.join(video_feature_dir, feature_filename)

                if audio_feature_index is None or not os.path.isfile(video_feature_path):
                    print("File %s is in the csv file but the feature is not extracted!" % filename)
                else:
                    na_all.append(audio_filename)

                    x_audio = audio_predictions_probability_matrix[audio_feature_index]
                    x_video = pickle.load(open(video_feature_path, 'rb'))
                    x_video = x_video.reshape(x_video.shape[1])
                    x = np.hstack((x_audio, x_video))

                    x_dset[-1] = x.astype(np.float32)

                    if count != (len(names) - 1):
                        x_dset.resize(x_dset.shape[0] + 1, axis=0)

                    y_all.append(None)
                    count += 1

        y_all = np.array(y_all, dtype=np.bool)
        hf.create_dataset('y', data=y_all)

        na_all = [x.encode('utf-8') for x in na_all]  # convert to utf-8 to store
        hf.create_dataset('na_list', data=na_all)