def test_preprocess(): x = np.array([np.random.rand(28, 28)]) y = np.array([2]) x_train_pp, y_train_pp, x_test_pp, y_test_pp = train.preprocess(x, y, x, y) assert y_train_pp.shape == (1, 10) assert y_test_pp.shape == (1, 10) assert x_train_pp.shape == (1, 28, 28, 1) assert x_test_pp.shape == (1, 28, 28, 1)
def run_test(): BS = 128 print('Loading and preprocessing test data...') mean, std = Learner.load_meanstd() imgs_test = load_test_data() imgs_test = preprocess(imgs_test) imgs_test = imgs_test.astype('float32') imgs_test -= mean imgs_test /= std print('Loading saved weights...') model = get_unet(Adam(0.001)) print ('Loading weights from %s' % Learner.best_weight_path) model.load_weights(Learner.best_weight_path) print ('Augment') alen, dlen = len(transforms), len(imgs_test) test_x = np.ndarray((alen, dlen, 1, img_rows, img_cols), dtype=np.float32) for i in range(dlen): for j, transform in enumerate(transforms): test_x[j,i] = transform['do'](imgs_test[i].copy()) # print('Predicting masks on test data...') outputs = [] asis_res = model.predict(imgs_test, batch_size=BS, verbose=1) outputs.append(asis_res) for j, transform in enumerate(transforms): t_y = model.predict(test_x[j], batch_size=BS, verbose=1) outputs.append(t_y) # print('Analyzing') test_masks = np.ndarray((dlen, 1, img_rows, img_cols), dtype=np.float32) test_probs = np.ndarray((dlen, ), dtype=np.float32) for i in range(dlen): masks = np.ndarray((alen+1, 1, img_rows, img_cols), dtype=np.float32) probs = np.ndarray((alen+1, ), dtype=np.float32) for j, t_y in enumerate(outputs): mask, prob = t_y[0][i], t_y[1][i] if j: mask = transforms[j-1]['undo'](mask) masks[j] = mask probs[j] = prob # test_masks[i] = np.mean(masks, 0) test_probs[i] = np.mean(probs) print('Saving ') np.save(Learner.test_mask_res, test_masks) np.save(Learner.test_mask_exist_res, test_probs)
def generate_submission(): # Load test images and preprocess for conv net. print('Loading and processing test images') imgs_test = DataManager.load_test_data() total = imgs_test.shape[0] imgs = np.ndarray( (total, 1, DataManager.IMG_TARGET_ROWS, DataManager.IMG_TARGET_ROWS), dtype=np.uint8) i = 0 for img in imgs_test: imgs[i] = preprocess(img) i += 1 print('Loading network') model = build_model() model.load_weights('./results/net.hdf5') print('Generating predictions') masks, has_masks = model.predict(imgs, verbose=1) ids = [] rles = [] for i in range(total): # Zero out masks when there is no-nerve pred. if has_masks[i, 0] < 0.5: masks[i, 0] *= 0. mask = post_process_mask(masks[i, 0]) rle = run_length_enc(mask) rles.append(rle) ids.append(i + 1) if i % 100 == 0: print('{}/{}'.format(i, total)) first_row = 'img,pixels' file_name = 'results/submission_{}.csv'.format(str(datetime.now())) with open(file_name, 'w+') as f: f.write(first_row + '\n') for i in range(total): s = str(ids[i]) + ',' + rles[i] f.write(s + '\n')
def run_test(): BS = 256 print('Loading and preprocessing test data...') mean, std = Learner.load_meanstd() imgs_test = load_test_data() # imgs_test = imgs_test[:100] # print ('test') imgs_test = preprocess(imgs_test) imgs_test = imgs_test.astype('float32') imgs_test -= mean imgs_test /= std print('Augment') alen, dlen = len(transforms), len(imgs_test) test_x = np.ndarray((alen, dlen, 1, img_rows, img_cols), dtype=np.float32) for i in xrange(dlen): for j, transform in enumerate(transforms): test_x[j, i] = transform['do'](imgs_test[i].copy()) # kfold = 6 kfold_masks, kfold_prob = [], [] for _iter in xrange(kfold): print('Iter=%d, Loading saved weights...' % _iter) model = get_unet(Adam(0.001)) filepath = Learner.best_weight_path + '_%d.fold' % _iter print('Loading weights from %s' % filepath) model.load_weights(filepath) # print('Predicting masks on test data...') outputs = [] asis_res = model.predict(imgs_test, batch_size=BS, verbose=1) outputs.append(asis_res) for j, transform in enumerate(transforms): t_y = model.predict(test_x[j], batch_size=BS, verbose=1) outputs.append(t_y) # print('Analyzing') test_masks = np.ndarray((dlen, 1, img_rows, img_cols), dtype=np.float32) test_probs = np.ndarray((dlen, ), dtype=np.float32) for i in xrange(dlen): masks = np.ndarray((alen + 1, 1, img_rows, img_cols), dtype=np.float32) probs = np.ndarray((alen + 1, ), dtype=np.float32) for j, t_y in enumerate(outputs): mask, prob = t_y[0][i], t_y[1][i] if j: mask = transforms[j - 1]['undo'](mask.copy()) masks[j] = mask probs[j] = prob # test_masks[i] = np.mean(masks, 0) test_probs[i] = np.mean(probs) kfold_masks.append(test_masks) kfold_prob.append(test_probs) print 'Summing results of ensemble' # res_masks = np.ndarray((dlen, 1, img_rows, img_cols), dtype=np.float32) res_probs = np.ndarray((dlen, ), dtype=np.float32) for i in xrange(dlen): masks = np.ndarray((kfold, 1, img_rows, img_cols), dtype=np.float32) probs = np.ndarray((kfold, ), dtype=np.float32) for k in xrange(kfold): masks[k] = kfold_masks[k][i] probs[k] = kfold_prob[k][i] res_masks[i] = np.mean(masks, 0) res_probs[i] = np.mean(probs) print('Saving ') np.save(Learner.test_mask_res, res_masks) np.save(Learner.test_mask_exist_res, res_probs)
""" print timeit.timeit(code, number=100000) ''' #def training_testing(inputlines,type): # type 1 is splitting the data into training and testing. fopen = open("./train.csv","r") datalines = fopen.readlines() #linenumbers = range(1,len(datalines)) # skipping the first line datalines = datalines[1:] #print linenumbers shuffle(datalines) percentage = int(0.8*len(datalines)) training_lines = datalines[:percentage] print "training lines: ",len(training_lines) [worddict,gtruth,idf_dict ] = train.preprocess(training_lines) train.multinomial_training(worddict,gtruth,idf_dict,training_lines,"test_model_param.p") # need to parse the testing_lines to remove the ground truth and repack as list of string. gold_data = [int(line[0]) for line in datalines[percentage+1:]] testing_lines =[line[2:] for line in datalines[percentage+1:] ] print "testing lines: ",len(testing_lines) test.multinomial_testing("test_model_param.p",testing_lines,"test_results.txt") lista = open("test_results.txt","r").readlines() listb = [int(element.strip().split(",")[1]) for element in lista[1:]] comparision = map(operator.sub, gold_data, listb) print "If score is 0 its perfect return else score is error value"
import numpy as np from predict import load_test_data from train import preprocess path = '/Users/xuchenyang/Documents/third_exp/file/segnet-lr-3-32-100/' predicted_masks = np.load( path + 'predict.npy') imgs_test, imgs_test_mask = load_test_data() #imgs_test_source = imgs_test.astype('float32') imgs_test_gt = preprocess(imgs_test_mask) predicted_masks_flat = predicted_masks.flatten() test_gt_masks_flat = imgs_test_gt.flatten() from sklearn import metrics fpr, tpr, thresholds = metrics.roc_curve(test_gt_masks_flat, predicted_masks_flat, pos_label=255) import matplotlib.pyplot as plt #plt.plot(list(fpr),list(tpr)) plt.plot([0,1],[0,1],'k--') line1, = plt.plot(fpr,tpr,'b',label="U-NET ROC (AUC = 0.86)") plt.legend(handles=[line1],loc=4,prop={'size':12}) #plt.plot(list(fpr),list(tpr)) plt.xlim(0,1.0) plt.ylim(0,1.0) plt.xlabel("False Positive Rate (1-Specificity)") plt.ylabel("True Positive Rate (Sensitivity)") plt.grid() plt.savefig(path+'roc')
import pandas as pd from train import preprocess from keras.models import load_model from math import isnan dataframe = pd.read_csv("data/test.csv") p_ids = dataframe["PassengerId"] data = preprocess(dataframe) model = load_model('titanic.h5') prediction = model.predict(data) with open('result.csv', 'w') as file: file.write("PassengerId,Survived\n") for p_id, pred in zip(p_ids, prediction): rounded_pred = int(round(pred[0])) file.write(str(p_id) + "," + str(rounded_pred) + "\n")
run_id = version.run_id model = mlflow.sklearn.load_model(f"models:/{model_name}/{model_stage}") # Load data data_path = Path(__file__).parents[0].resolve() / "data" train_df = pd.read_csv(data_path / "train.csv") test_df = pd.read_csv(data_path / "test.csv") # preprocessing impute_strats = { "Age": round(train_df["Age"].mean()), "Embarked": train_df["Embarked"].mode()[0], "Cabin": "Unknown", "Fare": round(train_df["Fare"].mean()), } test_df = preprocess(test_df, feature_labels, impute_strats) test_df["Survived"] = model.predict(test_df[feature_labels]) test_df[["PassengerId", "Survived"]].to_csv(data_path / "submission.csv", index=False) # submit to kaggle kaggle.api.competition_submit( file_name=str(data_path / "submission.csv"), message="Testing submission api", competition="titanic", ) time.sleep(30) test_accuracy = kaggle.api.process_response( kaggle.api.competitions_submissions_list_with_http_info( "titanic"))[0]["publicScore"] mlflow_client.log_metric(run_id, "test_accuracy", float(test_accuracy))
def test_preprocess(self): x_train, y_train, x_dev, y_dev = preprocess() print("x_train, y_train, x_dev, y_dev: {0},{1}".format(len(x_train), len(x_dev)))
def run_test(): BS = 128 print('Loading and preprocessing test data...') mean, std = Learner.load_meanstd() imgs_test, img_test_mask_gt = load_test_data() test_img_id = load_test_ids() imgs_test = preprocess(imgs_test) img_test_mask_gt = preprocess(img_test_mask_gt) imgs_test = imgs_test.astype('float32') imgs_test -= mean imgs_test /= std img_test_mask_gt = img_test_mask_gt.astype('float32') # mask_array = np.array(mask_array, dtype=np.float32) img_test_mask_gt /= 255.0 print('Loading saved weights...') model = get_unet(Adam(0.001)) print ('Loading weights from %s' % Learner.best_weight_path) model.load_weights(Learner.best_weight_path) print ('Augment') alen, dlen = len(transforms), len(imgs_test) test_x = np.ndarray((alen, dlen, 1, img_rows, img_cols), dtype=np.float32) for i in range(dlen): for j, transform in enumerate(transforms): test_x[j,i] = transform['do'](imgs_test[i].copy()) # print('Predicting masks on test data...') outputs = [] asis_res = model.predict(imgs_test, batch_size=BS, verbose=1) outputs.append(asis_res) for j, transform in enumerate(transforms): t_y = model.predict(test_x[j], batch_size=BS, verbose=1) outputs.append(t_y) # print('Analyzing') test_masks = np.ndarray((dlen, 1, img_rows, img_cols), dtype=np.float32) test_probs = np.ndarray((dlen, ), dtype=np.float32) for i in range(dlen): masks = np.ndarray((alen+1, 1, img_rows, img_cols), dtype=np.float32) probs = np.ndarray((alen+1, ), dtype=np.float32) for j, t_y in enumerate(outputs): mask, prob = t_y[0][i], t_y[1][i] if j: mask = transforms[j-1]['undo'](mask) masks[j] = mask probs[j] = prob # test_masks[i] = np.mean(masks, 0) test_probs[i] = np.mean(probs) # test_eval = model.evaluate(imgs_test, img_test_mask_gt, batch_size=BS) print(img_test_mask_gt.shape) print(test_masks.shape) # pred_dir = 'preds' # if not os.path.exists(pred_dir): # os.mkdir(pred_dir) # for image, image_id in zip(test_masks, test_img_id): # # image = (image[:, :, 0] * 255.).astype(np.uint8) # image = (image[0, :, :] * 255.) # print(image) # print(image.shape) # imsave(os.path.join(pred_dir, str(image_id) + '_pred.png'), image) print('Saving ') np.save(Learner.test_mask_res, test_masks) np.save(Learner.test_mask_exist_res, test_probs) np.save(Learner.test_mask_gt, img_test_mask_gt)
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0], # 0 [0, 0, 0, 0, 0, 0, 0, 0, 1, 0], # 8 [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], # 2 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], # 4 ] # test data test_digits_path = crop_image( os.path.join(os.path.curdir, 'captcha', 'test_set', 'captcha_test.jpg'), 'test') test_feature_set = [] for digit_path in test_digits_path: test_feature_set.append(feature_extract(digit_path)) test_label_set = [ [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], # 7 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], # 3 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], # 0 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], # 4 ] # train & predict scaler = preprocess(train_feature_set) scaler.transform(train_feature_set) scaler.transform(test_feature_set) clf = train(train_feature_set, train_label_set) prediction = clf.predict(test_feature_set) print(prediction) print(np.argmax(prediction, axis=1))
print("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) def get_args(): parser = argparse.ArgumentParser() parser.add_argument("--prune", dest="prune", action="store_true") parser.add_argument("--make_graph", dest="makegraph", action="store_true") args = parser.parse_args() return args if __name__ == '__main__': args = get_args() if args.prune: prune_filter_weight("./runs/1557076524/checkpoints/model-21300.meta", './runs/1557076524/checkpoints/', 128) #filters = prune_filter_weight("./pruned_by_l2_norm/1557071581/checkpoints/model-2600.meta", './pruned_by_l2_norm/1557071581/checkpoints',90) elif args.makegraph: x_train, y_train, vocab_processor, x_dev, y_dev, x_test, y_test = train.preprocess( ) make_graph(x_train, y_train, vocab_processor, x_dev, y_dev)
def predict(): model = get_unet() # print (model.metrics_names) # imgs_train, imgs_mask_train = load_train_data() path_to_save_results = path + "UNET_PREDICTIONS/" # imgs_train = preprocess(imgs_train) # imgs_mask_train = preprocess(imgs_mask_train) # # # mean= np.mean(img) # # std = np.std(imgs_mask_train) imgs_test, imgs_test_mask = load_test_data() mean = np.mean(imgs_test) std = np.std(imgs_test) # print(std) imgs_test = preprocess(imgs_test) imgs_test_mask = preprocess(imgs_test_mask) imgs_test_source = imgs_test.astype('float32') imgs_test_source -= mean imgs_test_source /= std imgs_test_mask = imgs_test_mask.astype('float32') imgs_test_mask /= 255. # scale masks to [0, 1] print('Loading saved weights...') print('-' * 30) model.load_weights(path + 'unet.hdf5') print('Predicting masks on test data...') print('-' * 30) imgs_mask_predict = model.predict(imgs_test_source, verbose=1) res = model.evaluate(imgs_test_source, imgs_test_mask, batch_size=32, verbose=1) res_loss = np.array(res) np.save(path + 'predict.npy', imgs_mask_predict) np.savetxt(path + 'res_loss.txt', res_loss) predicted_masks = np.load(path + 'predict.npy') predicted_masks *= 255 imgs_test, imgs_test_mask = load_test_data() for i in range(imgs_test.shape[0]): img = resize(imgs_test[i], (96, 96), preserve_range=True) img_mask = resize(imgs_test_mask[i], (96, 96), preserve_range=True) im_test_source = Image.fromarray(img.astype(np.uint8)) im_test_masks = Image.fromarray((img_mask.squeeze()).astype(np.uint8)) im_test_predict = Image.fromarray( (predicted_masks[i].squeeze()).astype(np.uint8)) im_test_source_name = "Test_Image_" + str(i + 1) + ".png" im_test_predict_name = "Test_Image_" + str(i + 1) + "_Predict.png" im_test_gt_mask_name = "Test_Image_" + str(i + 1) + "_OriginalMask.png" im_test_source.save( os.path.join(path_to_save_results, im_test_source_name)) im_test_predict.save( os.path.join(path_to_save_results, im_test_predict_name)) im_test_masks.save( os.path.join(path_to_save_results, im_test_gt_mask_name)) message = "Successfully Saved Results to " + path_to_save_results print message
import numpy as np import pandas as pd import pickle from train import preprocess outfile = 'model.sav' # Parsing script arguments parser = argparse.ArgumentParser(description='Process input') parser.add_argument('tsv_path', type=str, help='tsv file path') args = parser.parse_args() # Reading input TSV data = pd.read_csv(args.tsv_path, sep="\t") ids = data['id'].copy() X, y_true = preprocess(data, is_train=False) ##### # TODO - your prediction code here model = pickle.load(open(outfile, 'rb')) log_pred = model.predict(X) y_pred = np.exp(log_pred) # Example: prediction_df = pd.DataFrame(columns=['id', 'revenue']) prediction_df['id'] = ids prediction_df['revenue'] = y_pred #### # TODO - How to export prediction results prediction_df.to_csv("prediction.csv", index=False, header=False)
import numpy as np from train import get_unet, preprocess from data import load_test_data, load_train_data imgs_train, imgs_mask_train = load_train_data() imgs_train = preprocess(imgs_train) imgs_train = imgs_train.astype('float32') mean = np.mean(imgs_train) std = np.std(imgs_train) imgs_test, imgs_id_test = load_test_data() imgs_test = preprocess(imgs_test) imgs_test = imgs_test.astype('float32') imgs_test -= mean imgs_test /= std model = get_unet() model.load_weights("final.h5") imgs_mask_test = model.predict(imgs_test, verbose=1) np.save('imgs_mask_test_final.npy', imgs_mask_test)
""" print timeit.timeit(code, number=100000) ''' #def training_testing(inputlines,type): # type 1 is splitting the data into training and testing. fopen = open("./train.csv", "r") datalines = fopen.readlines() #linenumbers = range(1,len(datalines)) # skipping the first line datalines = datalines[1:] #print linenumbers shuffle(datalines) percentage = int(0.8 * len(datalines)) training_lines = datalines[:percentage] print "training lines: ", len(training_lines) [worddict, gtruth, idf_dict] = train.preprocess(training_lines) train.multinomial_training(worddict, gtruth, idf_dict, training_lines, "test_model_param.p") # need to parse the testing_lines to remove the ground truth and repack as list of string. gold_data = [int(line[0]) for line in datalines[percentage + 1:]] testing_lines = [line[2:] for line in datalines[percentage + 1:]] print "testing lines: ", len(testing_lines) test.multinomial_testing("test_model_param.p", testing_lines, "test_results.txt") lista = open("test_results.txt", "r").readlines() listb = [int(element.strip().split(",")[1]) for element in lista[1:]] comparision = map(operator.sub, gold_data, listb) print "If score is 0 its perfect return else score is error value"