def predict_bounding_boxes(model_filename): with tf.Graph().as_default(): net = network.Network() model = net.get_model(384, 384) model.load(model_filename) annotations = data.load_annotations() filepaths = data.create_image_list(annotations) input_vectors, labels = data.get_resized_input_data( filepaths, annotations) predicted_annotations = {} for imagename, input in zip(filepaths, input_vectors): predictions = model.predict([input])[0] x = round(predictions[0]) width = round(predictions[1]) y = round(predictions[2]) height = round(predictions[3]) predicted_annotations[imagename] = predictions with open(predicted_annotations_path, 'w') as data_file: json.dump(predicted_annotations, data_file)
def run_inference_on_image(): net = network.Network() model = net.get_model(384, 384) model.load('localize_network.net') annotations = data.load_annotations() filepaths = data.create_image_list(annotations) input_vectors, labels = data.get_resized_input_data(filepaths, annotations) fig = plt.figure() ax = fig.add_subplot(111, aspect='equal') for imagename, input in zip(filepaths, input_vectors): image = cv2.imread(os.path.join('train/all', imagename)) im = np.array(image, dtype=np.uint8) ax.imshow(im) predictions = model.predict([input])[0] x = round(predictions[0]) width = round(predictions[1]) y = round(predictions[2]) height = round(predictions[3]) rect = patches.Rectangle((x, y), width, height, linewidth=1, edgecolor='r', facecolor='none') ax.add_patch(rect) plt.pause(0.5) plt.cla()
def main(): # Create the output directory. os.makedirs(args.data_dir + '/tfrecord', exist_ok=True) df = load_annotations(args.data_dir + '/annotations_final.csv', args.num_audios_per_shard) print('Start building the dataset.') process_dataset(df, args.sample_rate, args.num_samples) print('Done.\n')
def extract_excitations(model_or_path, data_dir, num_examples, num_audios_per_shard, out_path): if type(model_or_path) == str: model = load_model(model_or_path) else: model = model_or_path # Prepare inputs. segments, label = batch_inputs(file_pattern=make_path( data_dir, 'tfrecord', 'test-????-of-????.seq.tfrecord'), batch_size=1, is_training=False, is_sequence=True, examples_per_shard=num_audios_per_shard, num_read_threads=1, shard_queue_name='filename_queue', example_queue_name='input_queue') segments = Input(tensor=tf.squeeze(segments)) label = Input(tensor=tf.squeeze(label)) # Create a model to extract excitations. excitations = [ model.get_layer('dense_' + str(i)).output for i in range(2, 20, 2) ] model_ex = TFRecordModel(inputs=model.inputs, outputs=excitations) model_ex = TFRecordModel(inputs=[segments, label], outputs=model_ex(segments) + [label]) # Extract excitations. outputs = [ model_ex.predict_tfrecord(segments) for _ in range(num_examples) ] exs, labels = [output[:-1] for output in outputs], [output[-1] for output in outputs] # Average excitations for each song. exs = [[ex_depth.squeeze().mean(axis=0) for ex_depth in ex] for ex in exs] labels = np.stack(labels) # Collect data to create a DataFrame of excitations. rows = [] for ex, label in zip(exs, labels): for depth, ex_depth in enumerate(ex): row = [ex_depth, depth] + label.tolist() rows.append(row) # Create the DataFrame and save them as a pickle file. tag_names = load_annotations(data_dir + '/annotations_final.csv', num_audios_per_shard).columns.tolist()[:50] df = pd.DataFrame(data=rows, columns=['ex', 'depth'] + tag_names) df.to_pickle(out_path)
def evaluate_classifier(model_filename): with tf.Graph().as_default(): classfier_net = classifier.Classifier() classification_model = classfier_net.get_model(122, 122) classification_model.load(model_filename) annotations = data.load_annotations() image_list = data.create_image_list(annotations) ok = 0 n = 0 with open(predicted_annotations_path) as data_file: bounding_box_data = json.load(data_file) #bounding_box_data = data.load_annotations() for filepath in image_list: x = int(bounding_box_data[filepath][0]) w = int(bounding_box_data[filepath][1]) y = int(bounding_box_data[filepath][2]) h = int(bounding_box_data[filepath][3]) # Extending bounding box by 10% x -= int(0.1 * w) w += int(0.2 * w) y -= int(0.1 * h) h += int(0.2 * h) crop = cv2.imread(data.get_image_path(filepath)) crop = crop[y:y + h, x:x + w] if crop is None: continue height, width, _ = crop.shape if height == 0 or width == 0: continue crop = cv2.resize(crop, (122, 122)) classification = classification_model.predict([crop])[0] if data.classes[np.argmax(classification)] == data.get_image_label( filepath): ok += 1 #print(data.classes[np.argmax(classification)] + " " + data.get_image_label(filepath)) n += 1 print(ok / n)
def infer(self): self._calculate_thresholds() img_ids, labels, confidences, set_thresholds = self.set_data fused_confidences = self._combine(confidences) vec_preds = fused_confidences > self.thresholds preds = vector_to_index_list(vec_preds) if self.set_type == 'test': global_scores = None annotations = load_annotations() classes = annotations['train']['classes'] save_kaggle_submision("ensemble_kaggle_submision.csv", img_ids, preds, classes) else: global_scores = f1_score(*reduce_stats( *multilabel_stats(labels, fused_confidences, self.thresholds))) print( "Ensemble results for {}. F1: {:.4}, precision: {:.4}, recall: {:.4}" .format(self.set_type, *global_scores)) return img_ids, labels, preds, confidences, global_scores
def infer(self): img_ids, labels, confidences, _ = self.set_data _, _, _, thresholds = self.thresholds_data M = confidences.shape[0] assert M % 2 == 1, "Number of models for this modality must be odd" # confidences: M x N x L, thresholds: M x L vec_preds_per_model = confidences > thresholds[:, np.newaxis, :] vec_preds = vec_preds_per_model.sum(axis=0) > M // 2 preds = vector_to_index_list(vec_preds) if self.set_type == 'test': global_scores = None annotations = load_annotations() classes = annotations['train']['classes'] save_kaggle_submision("ensemble_kaggle_submision.csv", img_ids, preds, classes) else: global_scores = f1_score(*reduce_stats( *multilabel_stats_from_pred(labels, vec_preds))) print( "Ensemble results for {}. F1: {:.4}, precision: {:.4}, recall: {:.4}" .format(self.set_type, *global_scores)) return img_ids, labels, preds, confidences, global_scores
def run_sliding_window(): annotations = data.load_annotations() filepaths = data.create_image_list(annotations) input_vectors, labels = data.get_resized_input_data(filepaths, annotations) fig1 = plt.figure() ax1 = fig1.add_subplot(111, aspect='equal') window_size_x = 200 window_size_y = 100 # Creates node ID --> English string lookup. node_lookup = NodeLookup() create_graph() with tf.Session() as sess: for imagename, input in zip(filepaths[:5], input_vectors[:5]): image = cv2.imread(data.get_image_path(imagename)) ax1.imshow(image) ax1.imshow(image) height, width, _ = image.shape window_x = 0 window_y = 0 positions = [] while window_x + window_size_x < width and window_y + window_size_y < height: sub_image = image[window_y:(window_y + window_size_y), window_x:(window_x + window_size_x)] softmax_tensor = sess.graph.get_tensor_by_name('softmax:0') predictions = sess.run(softmax_tensor, {'Cast:0': sub_image}) predictions = np.squeeze(predictions) top_k = predictions.argsort()[-1:][::-1] fish = False for node_id in top_k: human_string = node_lookup.id_to_string(node_id) if 'shark' in human_string or 'fish' in human_string or 'whale' in human_string or 'tuna' in human_string: fish = True if fish: rect = patches.Rectangle((window_x, window_y), window_size_x, window_size_y, linewidth=1, edgecolor='r', facecolor='none') ax1.add_patch(rect) plt.pause(0.1) window_x += window_size_x if window_x + window_size_x > width: window_x = 0 window_y += window_size_y plt.pause(0.5) plt.cla() plt.pause(5)
import cv2 import data import utils import numpy as np import network import matplotlib.pyplot as plt from matplotlib import patches annotations = data.load_annotations() images_list = data.create_image_list(annotations) X, Y = data.get_resized_input_data(images_list, annotations) X_train,Y_train, X_test, Y_test, test_images = utils.split_data(X, Y, 0.1,seed=1337, fpaths=images_list, ret_filepaths=True) print("Selected dataset shape:", np.shape(X_train)) # Train on that dataset net = network.Network() model = net.get_model() model.fit(X_train, Y_train, n_epoch=10, shuffle=True, validation_set=(X_test, Y_test), show_metric=True, batch_size=2, run_id='bounding_box_network') print("Network has been trained on selected dataset.") print("Showing results.") # Evaluate acquired model fig = plt.figure() ax = fig.add_subplot(111, aspect='equal') for image_data, annot, test_image in zip(X, Y, test_images): image = cv2.imread(data.get_image_path(test_image)) ax.imshow(image)
import numpy as np import os from data import get_data_loader, load_annotations, Imaterialist, class_frequency from ensemble import LearnersData import matplotlib.pyplot as plt annotations = load_annotations() sets = ['train', 'validation'] data_dir = 'data' image_datasets, dataloaders = {}, {} freqs = {} for set in sets: freq = class_frequency(set, annotations) freqs[set] = freq np.save("freq-{}".format(set), freq) model = "runs/" + "May24_07-07-00_cs231n-1se_resnext50_32x4d-bs-64-lr0.0006-mom0.9-wd1e-5-cutout4-minscale0.4-rota15-cas" + "/model_best.pth.tar" # 0.6556, PW1 test_data = LearnersData([model], 'test', 'test', False) preds_LNC = test_data.set_data[2] > test_data.set_data[3] preds_NC = preds_LNC.reshape(preds_LNC.shape[1], -1) freq = preds_NC.sum(axis=0) / preds_NC.shape[0] freqs['test'] = freq np.save("freq-test", freq) sorting_set = 'test' sorting = np.argsort(freqs[sorting_set]) sorted = {} for set, freq in freqs.items():