def predict_model(project, weights, user_files): img_dim = project['img_dim'] * project['img_size'] conv_dim = project['conv_dim'] * project['img_size'] models = [] for weight in project[weights]: if project['architecture'] == 'resnet50': models.append(get_resnet_final_model(img_dim, conv_dim, project['number_categories'], weight, project['is_final'])) elif project['architecture'] == 'xception': models.append(get_xception_final_model(img_dim, conv_dim, project['number_categories'], weight, project['is_final'])) else: models.append(get_inception_v3_final_model(img_dim, conv_dim, project['number_categories'], weight, project['is_final'])) output = [] user_files = os.path.expanduser(user_files) if os.path.isdir(user_files): for aug_gen, file_name in tqdm(gen_from_directory(user_files, img_dim, project)): predicted, pred_list = multi_predict(aug_gen, models, project['architecture']) output.append([project[weights], file_name, project['categories'][np.argmax(predicted)]] + pred_list) elif ((user_files.find('.jpg') > 0) or (user_files.find('.jpeg') > 0) or (user_files.find('.png') > 0)): aug_gen = prep_from_image(user_files, img_dim, project['augmentations']) predicted, pred_list = multi_predict(aug_gen, models, project['architecture']) output.append([project[weights], user_files, project['categories'][np.argmax(predicted)]] + pred_list) else: print(colored('Should either be a directory or a .jpg, .jpeg, and .png', 'red')) return if len(output) > 0: columns = ['weights_used','file_name', 'predicted'] + project['categories'] pred_df = pd.DataFrame(output, columns = columns) predictions_file = os.path.join(project['path'], project['name'] + '_' + weights + '_predictions.csv') if os.path.isfile(predictions_file): old_pred_df = pd.read_csv(predictions_file) pred_df = pd.concat([pred_df, old_pred_df]) os.makedirs(project['path'], exist_ok = True) pred_df.to_csv(predictions_file, index = False) print('Predictions saved to:', colored(predictions_file, 'cyan')) else: print(colored('No image files found.', 'red'))
def predict_model(project, weights, user_files): img_dim = project['img_dim'] * project['img_size'] conv_dim = project['conv_dim'] * project['img_size'] models = [] for weight in project[weights]: if project['architecture'] == 'resnet50': models.append(get_resnet_final_model(img_dim, conv_dim, project['number_categories'], weight, project['is_final'])) elif project['architecture'] == 'xception': models.append(get_xception_final_model(img_dim, conv_dim, project['number_categories'], weight, project['is_final'])) else: models.append(get_inception_v3_final_model(img_dim, conv_dim, project['number_categories'], weight, project['is_final'])) output = [] user_files = os.path.expanduser(user_files) if os.path.isdir(user_files): for aug_gen, file_name in tqdm(gen_from_directory(user_files, img_dim, project)): predicted, pred_list = multi_predict(aug_gen, models, project['architecture']) output.append([project[weights], file_name, project['categories'][np.argmax(predicted)]] + pred_list) elif ((user_files.find('.jpg') > 0) or (user_files.find('.jpeg') > 0) or (user_files.find('.png') > 0)): aug_gen = prep_from_image(user_files, img_dim, project['augmentations']) predicted, pred_list = multi_predict(aug_gen, models, project['architecture']) output.append([project[weights], user_files, project['categories'][np.argmax(predicted)]] + pred_list) else: print(colored('Should either be a directory or a .jpg, .jpeg, and .png', 'red')) return if len(output) > 0: columns = ['weights_used','file_name', 'predicted'] + project['categories'] pred_df = pd.DataFrame(output, columns = columns) predictions_file = os.path.join(project['path'], project['name'] + '_' + weights + '_predictions.csv') if os.path.isfile(predictions_file): old_pred_df = pd.read_csv(predictions_file) pred_df = pd.concat([pred_df, old_pred_df]) pred_df.to_csv(predictions_file, index = False) print('Predictions saved to:', colored(predictions_file, 'cyan')) else: print(colored('No image files found.', 'red'))
def train_model(project, final=False, last=False): weight_label = '-' + project['architecture'] + '-weights-' source_path = project['path'] weights_path = os.path.join(source_path, 'weights') plot_path = os.path.join(source_path, 'plots') if last: weights = 'last_weights' else: weights = 'best_weights' if final: weight_label += '-final-' use_path = os.path.join(source_path, 'augmented') else: use_path = os.path.join(source_path, 'pre_model') project['model_round'] += 1 shutil.rmtree(weights_path, ignore_errors=True) os.makedirs(weights_path) shutil.rmtree(plot_path, ignore_errors=True) os.makedirs(plot_path) img_dim = project['img_dim'] * project['img_size'] conv_dim = project['conv_dim'] * project['img_size'] lr = project['learning_rate'] decay = project['learning_rate_decay'] all_files = os.listdir(use_path) pre_model_files = list(filter(lambda x: r'-img-' in x, all_files)) label_names = list(filter(lambda x: r'-label-' in x, all_files)) pre_model_files_df = pd.DataFrame({'files': pre_model_files}) pre_model_files_df['suffix'] = pre_model_files_df.apply( lambda row: row.files.split('.')[-1], axis=1) pre_model_files_df = pre_model_files_df[pre_model_files_df.suffix == 'npy'] pre_model_files_df['ind'] = pre_model_files_df.apply( lambda row: row.files.split('-')[0], axis=1).astype(int) pre_model_files_df['label'] = pre_model_files_df.apply( lambda row: row.files.split('-')[3], axis=1) pre_model_files_df_dedup = pre_model_files_df.drop_duplicates(subset='ind') pre_model_files_df = pre_model_files_df.set_index(['ind']) pre_model_files.sort() label_names.sort() pre_model_files_arr = np.array(pre_model_files) label_names_arr = np.array(label_names) labels = [ np.argmax(np.load(os.path.join(use_path, label_name))) for label_name in label_names ] best_weights = [] last_weights = [] if project['kfold'] >= 3: kfold = StratifiedKFold(n_splits=project['kfold'], shuffle=True, random_state=project['seed']) kfold_generator = kfold.split(pre_model_files_df_dedup, pre_model_files_df_dedup.label) validate = True else: print('Too few k-folds selected, fitting on all data') kfold_generator = no_folds_generator(pre_model_files_df_dedup) validate = False for i, (train, test) in enumerate(kfold_generator): if project['kfold_every']: print('----- Fitting Fold', i, '-----') elif i > 0: break weights_name = project['name'] + weight_label + '-kfold-' + str( i) + '-round-' + str(project['model_round']) + '.hdf5' plot_name = project['name'] + weight_label + '-kfold-' + str( i) + '-round-' + str(project['model_round']) + '.png' if project[weights] is None: fold_weights = None else: fold_weights = project[weights][i] if final: if project['architecture'] == 'resnet50': model = get_resnet_final_model(img_dim, conv_dim, project['number_categories'], fold_weights, project['is_final']) elif project['architecture'] == 'xception': model = get_xception_final_model(img_dim, conv_dim, project['number_categories'], fold_weights, project['is_final']) else: model = get_inception_v3_final_model( img_dim, conv_dim, project['number_categories'], fold_weights, project['is_final']) for i, layer in enumerate(model.layers[1].layers): if len(layer.trainable_weights) > 0: if i < project['final_cutoff']: mult = 0.01 else: mult = 0.1 layer.learning_rate_multiplier = [ mult for tw in layer.trainable_weights ] else: if project['architecture'] == 'resnet50': pre_model, model = get_resnet_pre_post_model( img_dim, conv_dim, len(project['categories']), model_weights=fold_weights) elif project['architecture'] == 'xception': pre_model, model = get_xception_pre_post_model( img_dim, conv_dim, len(project['categories']), model_weights=fold_weights) else: pre_model, model = get_inception_v3_pre_post_model( img_dim, conv_dim, len(project['categories']), model_weights=fold_weights) pre_model_files_dedup_train = pre_model_files_df_dedup.iloc[train] train_ind = list(set(pre_model_files_dedup_train.ind)) pre_model_files_train = pre_model_files_df.loc[train_ind] gen_train = gen_minibatches(use_path, pre_model_files_train.files, project['batch_size'], project['architecture'], final=final) number_train_samples = len(pre_model_files_train) if validate: pre_model_files_dedup_test = pre_model_files_df_dedup.iloc[test] test_ind = list(set(pre_model_files_dedup_test.ind)) pre_model_files_test = pre_model_files_df.loc[test_ind] gen_test = gen_minibatches(use_path, pre_model_files_test.files, project['batch_size'], project['architecture'], final=final) number_test_samples = len(pre_model_files_test) validation_steps = (number_test_samples // project['batch_size']) weights_checkpoint_file = weights_name.split( '.' )[0] + '-kfold-' + str( i ) + "-improvement-{epoch:02d}-{val_categorical_accuracy:.4f}.hdf5" checkpoint = ModelCheckpoint(os.path.join(weights_path, weights_checkpoint_file), monitor='val_categorical_accuracy', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] else: gen_test = None validation_steps = None callbacks_list = None steps_per_epoch = (number_train_samples // project['batch_size']) for j in range(project['rounds']): optimizer = Adam(lr=lr, decay=decay) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['categorical_accuracy']) model.fit_generator(gen_train, steps_per_epoch=steps_per_epoch, epochs=project['cycle'] * (j + 1), verbose=1, validation_data=gen_test, validation_steps=validation_steps, initial_epoch=j * project['cycle'], callbacks=callbacks_list) model.save_weights(os.path.join(weights_path, weights_name)) last_weights.append(os.path.join(weights_path, weights_name)) weights_names = os.listdir(weights_path) max_val = -1 max_i = -1 for j, name in enumerate(weights_names): if name.find(weights_name.split('.')[0]) >= 0: if (name.find(weight_label) >= 0) and (name.find('improvement') >= 0): val = int(name.split('.')[1]) if val > max_val: max_val = val max_i = j if project['plot']: print('Plotting confusion matrix') if max_i == -1: print('Loading last weights:', os.path.join(weights_path, weights_name)) model.load_weights(os.path.join(weights_path, weights_name)) else: print('Loading best weights:', os.path.join(weights_path, weights_names[max_i])) model.load_weights( os.path.join(weights_path, weights_names[max_i])) best_predictions = [] true_labels = [] print('Predicting test files') if validate: use_files = pre_model_files_test.files else: use_files = pre_model_files_train.files for array_name in tqdm(use_files): img_path = os.path.join(use_path, array_name) img = np.load(img_path) if final: if project['architecture'] == 'resnet50': img = np.squeeze( resnet_preprocess_input(img[np.newaxis].astype( np.float32))) elif project['architecture'] == 'xception': img = np.squeeze( xception_preprocess_input(img[np.newaxis].astype( np.float32))) else: img = np.squeeze( inception_v3_preprocess_input( img[np.newaxis].astype(np.float32))) prediction = model.predict(img[np.newaxis]) best_predictions.append( project['categories'][np.argmax(prediction)]) true_label = np.load(img_path.replace('-img-', '-label-')) true_labels.append( project['categories'][np.argmax(true_label)]) cm = confusion_matrix(true_labels, best_predictions, project['categories']) plt.clf() sns.heatmap(pd.DataFrame(cm, project['categories'], project['categories']), annot=True, fmt='g') plt.xlabel('Actual') plt.xlabel('Predicted') plt.xticks(rotation=45, fontsize=8) plt.yticks(rotation=45, fontsize=8) plt.title('Confusion matrix for fold: ' + str(i) + '\nweights' + weights_name) plt.savefig(os.path.join(plot_path, plot_name)) print('Confusion matrix plot saved:', colored(os.path.join(plot_path, plot_name), 'magenta')) if max_i == -1: best_weights.append(os.path.join(weights_path, weights_name)) else: best_weights.append( os.path.join(weights_path, weights_names[max_i])) project['number_categories'] = len(project['categories']) project['best_weights'] = best_weights project['last_weights'] = last_weights project['is_final'] = final return project
def start_server(project, weights): app = Flask(__name__) api = Api(app) parser = reqparse.RequestParser() parser.add_argument('img_path', type=str) img_dim = 224 * project['img_size'] conv_dim = 7 * project['img_size'] models = [] for weight in project[weights]: if project['architecture'] == 'resnet50': models.append( get_resnet_final_model(img_dim, conv_dim, project['number_categories'], weight, project['is_final'])) elif project['architecture'] == 'xception': models.append( get_xception_final_model(img_dim, conv_dim, project['number_categories'], weight, project['is_final'])) else: models.append( get_inception_v3_final_model(img_dim, conv_dim, project['number_categories'], weight, project['is_final'])) class Predict(Resource): def post(self): args = parser.parse_args(strict=True) img_path = os.path.expanduser(args['img_path']) if os.path.isfile(img_path): if img_path.lower().find('.png') > 0 or img_path.lower().find( '.jpg') > 0 or img_path.lower().find('.jpeg') > 0: aug_gen = prep_from_image(img_path, img_dim, project['augmentations']) pred_list, predicted = multi_predict( aug_gen, models, project['architecture']) pred_list = [[float(p) for p in pred] for pred in list(pred_list)] result = { 'weights': project[weights], 'image_path': img_path, 'predicted': project['categories'][np.argmax(predicted)], 'classes': project['categories'], 'class_predictions': pred_list } return jsonify(result) else: return 'File must be a jpeg or png: ' + args['img_path'] elif os.path.isdir(img_path): result = [] for aug_gen, file_name in gen_from_directory( img_path, img_dim, project): pred_list, predicted = multi_predict( aug_gen, models, project['architecture']) pred_list = [[float(p) for p in pred] for pred in list(pred_list)] result.append({ 'weights': project[weights], 'image_path': file_name, 'predicted': project['categories'][np.argmax(predicted)], 'classes': project['categories'], 'class_predictions': pred_list }) if len(result) > 0: return jsonify(result) else: return 'No images found in directory: ' + args['img_path'] else: return 'Image does not exist locally: ' + args['img_path'] api.add_resource(Predict, '/predict') print('') print('To predict a local image, simply:') print('') print( colored( 'curl http://localhost:' + str(project['api_port']) + '/predict -d "img_path=/path/to/your/img.png" -X POST', 'green')) print('') print('or') print('') print( colored( 'curl http://localhost:' + str(project['api_port']) + '/predict -d "img_path=/path/to/your/img_dir" -X POST', 'green')) print('') app.run(port=str(project['api_port']))
def start_server(project, weights): app = Flask(__name__) api = Api(app) parser = reqparse.RequestParser() parser.add_argument('img_path', type = str) img_dim = 224 * project['img_size'] conv_dim = 7 * project['img_size'] models = [] for weight in project[weights]: if project['architecture'] == 'resnet50': models.append(get_resnet_final_model(img_dim, conv_dim, project['number_categories'], weight, project['is_final'])) elif project['architecture'] == 'xception': models.append(get_xception_final_model(img_dim, conv_dim, project['number_categories'], weight, project['is_final'])) else: models.append(get_inception_v3_final_model(img_dim, conv_dim, project['number_categories'], weight, project['is_final'])) class Predict(Resource): def post(self): args = parser.parse_args(strict = True) img_path = os.path.expanduser(args['img_path']) if os.path.isfile(img_path): if img_path.lower().find('.png') > 0 or img_path.lower().find('.jpg') > 0 or img_path.lower().find('.jpeg') > 0: aug_gen = prep_from_image(img_path, img_dim, project['augmentations']) pred_list, predicted = multi_predict(aug_gen, models, project['architecture']) pred_list = [[float(p) for p in pred] for pred in list(pred_list)] result = {'weights': project[weights], 'image_path': img_path, 'predicted': project['categories'][np.argmax(predicted)], 'classes': project['categories'], 'class_predictions': pred_list} return jsonify(result) else: return 'File must be a jpeg or png: ' + args['img_path'] elif os.path.isdir(img_path): result = [] for aug_gen, file_name in gen_from_directory(img_path, img_dim, project): pred_list, predicted = multi_predict(aug_gen, models, project['architecture']) pred_list = [[float(p) for p in pred] for pred in list(pred_list)] result.append({'weights': project[weights], 'image_path': file_name, 'predicted': project['categories'][np.argmax(predicted)], 'classes': project['categories'], 'class_predictions': pred_list}) if len(result) > 0: return jsonify(result) else: return 'No images found in directory: ' + args['img_path'] else: return 'Image does not exist locally: ' + args['img_path'] api.add_resource(Predict, '/predict') print('') print('To predict a local image, simply:') print('') print(colored('curl http://localhost:' + str(project['api_port']) + '/predict -d "img_path=/path/to/your/img.png" -X POST', 'green')) print('') print('or') print('') print(colored('curl http://localhost:' + str(project['api_port']) + '/predict -d "img_path=/path/to/your/img_dir" -X POST', 'green')) print('') app.run(port = project['api_port'])
def train_model(project, final = False, last = False): weight_label = '-' + project['architecture'] + '-weights-' source_path = project['path'] weights_path = os.path.join(source_path, 'weights') plot_path = os.path.join(source_path, 'plots') if last: weights = 'last_weights' else: weights = 'best_weights' if final: weight_label += '-final-' use_path = os.path.join(source_path, 'augmented') else: use_path = os.path.join(source_path, 'pre_model') project['model_round'] += 1 shutil.rmtree(weights_path,ignore_errors=True) os.makedirs(plot_path) img_dim = project['img_dim'] * project['img_size'] conv_dim = project['conv_dim'] * project['img_size'] lr = project['learning_rate'] decay = project['learning_rate_decay'] all_files = os.listdir(use_path) pre_model_files = list(filter(lambda x: r'-img-' in x, all_files)) label_names = list(filter(lambda x: r'-label-' in x, all_files)) pre_model_files_df = pd.DataFrame({'files': pre_model_files}) pre_model_files_df['suffix'] = pre_model_files_df.apply(lambda row: row.files.split('.')[-1], axis = 1) pre_model_files_df = pre_model_files_df[pre_model_files_df.suffix == 'npy'] pre_model_files_df['ind'] = pre_model_files_df.apply(lambda row: row.files.split('-')[0], axis = 1).astype(int) pre_model_files_df['label'] = pre_model_files_df.apply(lambda row: row.files.split('-')[3], axis = 1) pre_model_files_df_dedup = pre_model_files_df.drop_duplicates(subset='ind') pre_model_files_df = pre_model_files_df.set_index(['ind']) pre_model_files.sort() label_names.sort() pre_model_files_arr = np.array(pre_model_files) label_names_arr = np.array(label_names) labels = [np.argmax(np.load(os.path.join(use_path, label_name))) for label_name in label_names] best_weights = [] last_weights = [] if project['kfold'] >= 3: kfold = StratifiedKFold(n_splits=project['kfold'], shuffle=True, random_state = project['seed']) kfold_generator = kfold.split(pre_model_files_df_dedup, pre_model_files_df_dedup.label) validate = True else: print('Too few k-folds selected, fitting on all data') kfold_generator = no_folds_generator(pre_model_files_df_dedup) validate = False for i, (train, test) in enumerate(kfold_generator): if project['kfold_every']: print('----- Fitting Fold', i, '-----') elif i > 0: break weights_name = project['name'] + weight_label + '-kfold-' + str(i) + '-round-' + str(project['model_round']) +'.hdf5' plot_name = project['name'] + weight_label + '-kfold-' + str(i) + '-round-' + str(project['model_round']) +'.png' if project[weights] is None: fold_weights = None else: fold_weights = project[weights][i] if final: if project['architecture'] == 'resnet50': model = get_resnet_final_model(img_dim, conv_dim, project['number_categories'], fold_weights, project['is_final']) elif project['architecture'] == 'xception': model = get_xception_final_model(img_dim, conv_dim, project['number_categories'], fold_weights, project['is_final']) else: model = get_inception_v3_final_model(img_dim, conv_dim, project['number_categories'], fold_weights, project['is_final']) for i, layer in enumerate(model.layers[1].layers): if len(layer.trainable_weights) > 0: if i < project['final_cutoff']: mult = 0.01 else: mult = 0.1 layer.learning_rate_multiplier = [mult for tw in layer.trainable_weights] else: if project['architecture'] == 'resnet50': pre_model, model = get_resnet_pre_post_model(img_dim, conv_dim, len(project['categories']), model_weights = fold_weights) elif project['architecture'] == 'xception': pre_model, model = get_xception_pre_post_model(img_dim, conv_dim, len(project['categories']), model_weights = fold_weights) else: pre_model, model = get_inception_v3_pre_post_model(img_dim, conv_dim, len(project['categories']), model_weights = fold_weights) pre_model_files_dedup_train = pre_model_files_df_dedup.iloc[train] train_ind = list(set(pre_model_files_dedup_train.ind)) pre_model_files_train = pre_model_files_df.loc[train_ind] gen_train = gen_minibatches(use_path, pre_model_files_train.files, project['batch_size'], project['architecture'], final = final) number_train_samples = len(pre_model_files_train) if validate: pre_model_files_dedup_test = pre_model_files_df_dedup.iloc[test] test_ind = list(set(pre_model_files_dedup_test.ind)) pre_model_files_test = pre_model_files_df.loc[test_ind] gen_test = gen_minibatches(use_path, pre_model_files_test.files, project['batch_size'], project['architecture'], final = final) number_test_samples = len(pre_model_files_test) validation_steps = (number_test_samples // project['batch_size']) weights_checkpoint_file = weights_name.split('.')[0] + '-kfold-' + str(i) + "-improvement-{epoch:02d}-{val_categorical_accuracy:.4f}.hdf5" checkpoint = ModelCheckpoint(os.path.join(weights_path, weights_checkpoint_file), monitor='val_categorical_accuracy', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] else: gen_test = None validation_steps = None callbacks_list = None steps_per_epoch = (number_train_samples // project['batch_size']) for j in range(project['rounds']): optimizer = Adam(lr = lr, decay = decay) model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['categorical_accuracy']) model.fit_generator(gen_train, steps_per_epoch = steps_per_epoch, epochs = project['cycle'] * (j + 1), verbose = 1, validation_data = gen_test, validation_steps = validation_steps, initial_epoch = j * project['cycle'], callbacks = callbacks_list) model.save_weights(os.path.join(weights_path, weights_name)) last_weights.append(os.path.join(weights_path, weights_name)) weights_names = os.listdir(weights_path) max_val = -1 max_i = -1 for j, name in enumerate(weights_names): if name.find(weights_name.split('.')[0]) >= 0: if (name.find(weight_label) >= 0) and (name.find('improvement') >= 0): val = int(name.split('.')[1]) if val > max_val: max_val = val max_i = j if project['plot']: print('Plotting confusion matrix') if max_i == -1: print('Loading last weights:', os.path.join(weights_path, weights_name)) model.load_weights(os.path.join(weights_path, weights_name)) else: print('Loading best weights:', os.path.join(weights_path, weights_names[max_i])) model.load_weights(os.path.join(weights_path, weights_names[max_i])) best_predictions = [] true_labels = [] print('Predicting test files') if validate: use_files = pre_model_files_test.files else: use_files = pre_model_files_train.files for array_name in tqdm(use_files): img_path = os.path.join(use_path, array_name) img = np.load(img_path) if final: if project['architecture'] == 'resnet50': img = np.squeeze(resnet_preprocess_input(img[np.newaxis].astype(np.float32))) elif project['architecture'] == 'xception': img = np.squeeze(xception_preprocess_input(img[np.newaxis].astype(np.float32))) else: img = np.squeeze(inception_v3_preprocess_input(img[np.newaxis].astype(np.float32))) prediction = model.predict(img[np.newaxis]) best_predictions.append(project['categories'][np.argmax(prediction)]) true_label = np.load(img_path.replace('-img-','-label-')) true_labels.append(project['categories'][np.argmax(true_label)]) cm = confusion_matrix(true_labels, best_predictions, project['categories']) plt.clf() sns.heatmap(pd.DataFrame(cm, project['categories'], project['categories']), annot = True, fmt = 'g') plt.xlabel('Actual') plt.xlabel('Predicted') plt.xticks(rotation = 45, fontsize = 8) plt.yticks(rotation = 45, fontsize = 8) plt.title('Confusion matrix for fold: ' + str(i) + '\nweights' + weights_name) plt.savefig(os.path.join(plot_path, plot_name)) print('Confusion matrix plot saved:', colored(os.path.join(plot_path, plot_name), 'magenta')) if max_i == -1: best_weights.append(os.path.join(weights_path, weights_name)) else: best_weights.append(os.path.join(weights_path, weights_names[max_i])) project['number_categories'] = len(project['categories']) project['best_weights'] = best_weights project['last_weights'] = last_weights project['is_final'] = final return project