def get_train_test(split_ratio=0.6, random_state=42): labels, indices, _ = get_labels() X = np.load(labels[0] + '.npy') y = np.zeros(X.shape[0]) for i, label in enumerate(labels[1:]): x = np.load(label + '.npy') X = np.vstack((X, x)) y = np.append(y, np.full(x.shape[0], fill_value=(i + 1))) assert X.shape[0] == len(y) return train_test_split(X, y, test_size=(1 - split_ratio), random_state=random_state)
def main(out_file): sys, dia = convert_data.get_labels(LABELS_FILE) all_systole = np.array(sys.values()) all_diastole = np.array(dia.values()) default_sys = validate.get_default(all_systole) default_dia = validate.get_default(all_diastole) sys_net = validate.get_net(SYS_DEPLOY_FILE, SYS_MODEL_FILE) dia_net = validate.get_net(DIA_DEPLOY_FILE, DIA_MODEL_FILE) print 'Writing submission' validate.write_nn_submission( out_file, PROCESSED_VALIDATION_DATA_DIR, sys_net, dia_net, default_sys, default_dia, PREPROC)
def process_all(train_dest_folder, test_dest_folder, data_folder, labels_file, n_jobs=-1, use_lv_segmentation=False): random.seed(SEED) caffe.set_device(DEVICE_ID) caffe.set_mode_gpu() if labels_file: systole, diastole = convert_data.get_labels(labels_file) else: systole = diastole = None studies = sorted(convert_data.get_studies(data_folder)) test_prop = TEST_PROP if test_dest_folder else 0.0 dest_folders = ([test_dest_folder] * int(test_prop * len(studies)) + [train_dest_folder] * (len(studies) - int(test_prop * len(studies)))) random.shuffle(dest_folders) Parallel(n_jobs=n_jobs)( delayed(_process)(dest_folder, study_id, study_path, systole, diastole, use_lv_segmentation) for dest_folder, (study_id, study_path) in zip(dest_folders, studies))
# plt.draw() # plt.pause(0.001) m = np.abs(f).mean() if m < 0: print('-') else: inp = frames2mfcc(f).reshape(1, 20, 11, 1) # print(get_labels()[0][ # np.argmax(model.predict(inp)) # ]) predicts = model.predict(inp) if np.max(predicts) < 0.8: print('.') else: print(get_labels()[0][np.argmax(predicts)], np.max(predicts)) #print(len(frames)) except KeyboardInterrupt: pass # print("Terminating") # stream.stop_stream() # stream.close() # audio.terminate() # waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb') # waveFile.setnchannels(CHANNELS) # waveFile.setsampwidth(audio.get_sample_size(FORMAT)) # waveFile.setframerate(RATE) # waveFile.writeframes(b''.join(frames))
def submission(): """ Generate submission file for the trained models. """ cdf_fn = vec_to_cdf sys, dia = convert_data.get_labels(LABELS_FILE) all_systole = np.array(sys.values()) all_diastole = np.array(dia.values()) default_sys = validate.get_default(all_systole) default_dia = validate.get_default(all_diastole) print('Loading and compiling models...') model_systole = GET_MODEL(NUM_FRAMES, CROP_SIZE, CROP_SIZE, OUT) model_diastole = GET_MODEL(NUM_FRAMES, CROP_SIZE, CROP_SIZE, OUT) print('Loading models weights...') model_systole.load_weights(SAVE_BEST_WEIGHTS_SYS) model_diastole.load_weights(SAVE_BEST_WEIGHTS_DIA) print('Loading validation data...') X, ids = load_validation_data(X_VAL_FILE, IDS_FILE) if PREPROCESS: print('Pre-processing images...') X = preprocess(X) X = center(X, CROP_SIZE) batch_size = 32 print('Predicting on validation data...') pred_systole = model_systole.predict(X, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1) print(pred_systole.shape) print(pred_diastole.shape) # real predictions to CDF cdf_pred_systole = cdf_fn(pred_systole) cdf_pred_diastole = cdf_fn(pred_diastole) print(cdf_pred_systole.shape) print(cdf_pred_diastole.shape) print('Accumulating results...') sub_systole = accumulate_study_results(ids, cdf_pred_systole) sub_diastole = accumulate_study_results(ids, cdf_pred_diastole) # write to submission file print('Writing submission to file...') fi = csv.reader(open(SAMPLE_SUBMISSION)) f = open(OUT_SUBMISSION, 'w') fo = csv.writer(f, lineterminator='\n') fo.writerow(fi.next()) for line in fi: idx = line[0] key, target = idx.split('_') key = int(key) out = [idx] if key in sub_systole: if target == 'Diastole': out.extend(list(sub_diastole[key][0])) else: out.extend(list(sub_systole[key][0])) else: if target == 'Diastole': out.extend(list(default_dia)) else: out.extend(list(default_sys)) print('Miss {0}'.format(idx)) fo.writerow(out) f.close() print('Done.')
def addBackgroundNoise(): background, fs, enc = wavread(BACKGROUND_NOISE_FILE) background = background[:,0] labels, _, _ = get_labels(INPUT_DIRECTORY) for label in labels: generateSamples(label, background)
from keras.models import model_from_yaml from mfcc import wav2mfcc from convert_data import get_labels import numpy as np OUTPUT_PATH = './output/' yaml_file = open(OUTPUT_PATH + 'model.yaml', 'r') loaded_model_yaml = yaml_file.read() yaml_file.close() model = model_from_yaml(loaded_model_yaml) model.load_weights(OUTPUT_PATH + 'model.h5') # FIXME: think about model.compile if needed # sample = wav2mfcc('./data/right/0a7c2a8d_nohash_0.wav') sample = wav2mfcc('./recordings/1525038319.2531478.wav') sample_reshaped = sample.reshape(1, 20, 11, 1) print(get_labels()[0][np.argmax(model.predict(sample_reshaped))])