def test_experiment_boston(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/boston.csv", filter_type = "remover", model_features = "medv", max_samples="auto", contamination=0.1, max_features=1.0, ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.boston_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 14) # 13 features + 1 anomaly score self.assertEqual(len(names), 14)
def test_experiment_report_contexts(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/reports_contexts_small.csv", column = "context", question = "What is the best weed herbicide?", top = 5, inner_batch_size = 5, tokenizer_fn = "facebook/dpr-reader-single-nq-base", tokenizer_max_len = 512, dpr_fn = "facebook/dpr-reader-single-nq-base", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.report_contexts_test_data() print("######################################################") print(data) with server.Server() as s: print("######################################################") print("s") response = s.test(data=data,timeout=10) ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 4) # 1 feature
def test_experiment_boston(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/boston.csv", target="medv", strategy_num="mean", strategy_cat="most_frequent", fillvalue_num=0, fillvalue_cat="", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.boston_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 13) # 13 features self.assertEqual(len(names), 13)
def test_experiment_boston(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/boston.csv", filter_type="remover", model_features="medv", n_clusters=3, n_init=10, max_iter=300, algorithm="auto", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.boston_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 17) # 13 features+ 1 cluster + 3 distance to clusters self.assertEqual(len(names), 17)
def test_experiment_face_detection_cuda(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/football_teams.zip", device="cuda", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.image_testdata(kind='people', ext='jpg') with server.Server() as s: response = s.test(data=data, timeout=10) if 'tensor' in response.keys(): tensor_shape = response["tensor"]['shape'] self.assertEqual(tensor_shape[1], 5) # output 5 features else: # is a ndarray ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 5) # 5 features names = response["names"] self.assertEqual(len(names), 5) # 5 feature names
def test_experiment_titanic(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/titanic.csv", target="Survived", filter_type="remover", model_features="", one_hot_features="", n_estimators=10, criterion="gini", max_depth=None, max_features="auto", class_weight=None, method="predict_proba", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.titanic_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 14) # 11 features + 1 class + 2 probas self.assertEqual(len(names), 14)
def test_experiment_titanic(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/titanic.csv", target="Survived", high_cardinality_features="Pclass", method="kmeans", threshold=0.1, n=10, ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.titanic_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 11) # 11 features self.assertEqual(len(names), 11)
def test_experiment_boston(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/boston.csv", target="medv", filter_type="remover", model_features="", one_hot_features="", kernel="rbf", degree=3, gamma="auto", C=1.0, max_iter=-1, ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.boston_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 14) # 13 features + 1 prediction self.assertEqual(len(names), 14)
def test_experiment_iris(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/iris.csv", target="Species", date=None, group=["SepalLengthCm"], budget=20, ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.iris_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] print(names) ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 12) # 4 original features + 8 new features self.assertEqual(len(names), 12)
def test_experiment_boston(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/boston.csv", target="medv", filter_type="remover", model_features="", ordinal_features="", fit_intercept=True, ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.boston_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 14) # 13 features + 1 prediction self.assertEqual(len(names), 14)
def test_experiment_titanic(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/titanic.csv", target="Survived", filter_type="remover", model_features="", one_hot_features="", time_left_for_this_task=30, per_run_time_limit=30, ensemble_size=5, method="predict_proba", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.titanic_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 14) # 11 features + 1 class + 2 probas self.assertEqual(len(names), 14)
def test_experiment_report_contexts_word2vec(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/reports_contexts.csv", column="context", question="Qual é o melhor herbicida para erva da ninha ?", retriever_type="word2vec", bm25_k1=2, bm25_b=0.75, top=10, ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.report_contexts_test_data() with server.Server() as s: response = s.test(data=data) ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 4) # 1 feature
def test_experiment_titanic(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/titanic.csv", target="Survived", filter_type="remover", model_features="", one_hot_features="", C=1.0, kernel="rbf", degree=3, gamma="auto", probability=True, max_iter=-1, method="predict_proba", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.titanic_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 14) # 11 features + 1 class + 2 probas self.assertEqual(len(names), 14)
def test_hotel_bookings(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/hotel_bookings.csv", target="is_canceled", high_cardinality_features="hotel", method="kmeans", threshold=0.1, n=10, ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.hotel_bookings_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 31) # 31 features self.assertEqual(len(names), 31)
def test_yolo_empty_output(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset=EXPERIMENT_DATASET, score_threshold=0.9999, iou_threshold=0.9999, yolo_weight_type="tiny", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.image_testdata(kind='text', ext='png') with server.Server() as s: response = s.test(data=data, timeout=10) if 'tensor' in response.keys(): tensor_shape = response["tensor"]['shape'] self.assertEqual(tensor_shape[1], 6) # outputs 6 features else: # is a ndarray ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 6) # 6 features names = response["names"] self.assertEqual(len(names), 6) # 6 feature names
def test_yolo_tiny_portuguese(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset=EXPERIMENT_DATASET, language="português", yolo_weight_type="tiny", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) for ext in ['png', 'jpg']: data = datasets.image_testdata(kind='objects', ext=ext) with server.Server() as s: response = s.test(data=data, timeout=10) if 'tensor' in response.keys(): tensor_shape = response["tensor"]['shape'] self.assertEqual(tensor_shape[1], 6) # outputs 6 features else: # is a ndarray ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 6) # 6 features names = response["names"] self.assertEqual(len(names), 6) # 6 feature names
def test_experiment_titanic(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/titanic.csv", target="Survived", filter_type="remover", model_features="", one_hot_features="", hidden_layer_sizes=100, activation="relu", solver="adam", learning_rate="constant", max_iter=200, shuffle=True, method="predict_proba", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.titanic_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 14) # 11 features + 1 class + 2 probas self.assertEqual(len(names), 14)
def test_experiment_ocr_output_data(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/ocr_dataset.zip", target="target", filter_type="incluir", model_features="input_image", bbox_conf=60, segmentation_mode="Considere um único bloco de texto uniforme", ocr_engine="Mecanismo de redes neurais com apenas LSTM", language="por", bbox_return="np_array", image_return_format="N/A"), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) for ext in ['png', 'jpg']: data = datasets.image_testdata(kind='text', ext=ext) with server.Server() as s: response = s.test(data=data, timeout=10) print(response) for bbox in response['ndarray']: xmin, ymin, xmax, ymax, text = bbox self.assertGreater(xmax, xmin, "BoundingBox incorreta.") self.assertGreater(ymax, ymin, "BoundingBox incorreta.")
def test_experiment_titanic(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/titanic.csv", target="Survived", filter_type="remover", model_features="", ordinal_features="", penalty="l2", C=1.0, fit_intercept=True, class_weight=None, solver="liblinear", max_iter=100, multi_class="auto", method="predict_proba", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.titanic_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 14) # 11 features + 1 class + 2 probas self.assertEqual(len(names), 14)
def test_experiment_hotel_bookings(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/hotel_bookings.csv", target="is_canceled", date="reservation_status_date", group=["hotel"], budget=20, ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.hotel_bookings_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] print(names) ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 110) # 31 original features + 79 new features self.assertEqual(len(names), 110)
def test_experiment_imdb(self): os.chdir("tasks/nlp-glove-embeddings-sentence-classification") papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/imdb.csv", target="label", language="english", train_batch_size=10, eval_batch_size=2, max_epochs=200, accumulate_grad_batches=8, learning_rate=0.12, seed=7, hidden_dim=300, filter_type="incluir", model_features="text", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.imdb_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 1) # 1 features self.assertEqual(len(names), 1)
def test_experiment(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/reports_contexts_small.csv", question = "Qual é o melhor herbicida para erva da ninha ?", top = 10, column_context = "context", column_question = "question", column_answer_start = "answer_start", column_answer_end= "answer_end", train_from_zero = False, train_from_squad = False, dev_size_from_data= 0.2, test_size_from_dev= 0.5, batch_dataset_preparation = 30 , model_name= "neuralmind/bert-large-portuguese-cased", train_batch_size= 2, eval_batch_size= 2, max_length= 384, doc_stride= 128, learning_rate= 3.0e-5, eps= 1.0e-08, seed = 13, num_gpus= 1, profiler= True, max_epochs= 2, accumulate_grad_batches= 16, check_val_every_n_epoch= 1, progress_bar_refresh_rate= 1, gradient_clip_val= 1.0, fast_dev_run= False, monitor= 'avg_train_loss', min_delta= 0.01, patience= 1, verbose= False, mode= 'min' ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.document_reader_test_data() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 5) self.assertEqual(len(names), 5)
def test_experiment_face_detection_without_people(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/football_teams.zip", image_size=64, margin=5, min_face_size=10, factor=0.709, keep_all=True, device="cpu", seed=7, inference_batch_size=2, input_square_transformation_size=128, ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) for ext in ['png', 'jpg']: data = datasets.image_testdata(kind='objects', ext=ext) with server.Server() as s: response = s.test(data=data, timeout=10) if 'tensor' in response.keys(): tensor_shape = response["tensor"]['shape'] self.assertEqual(tensor_shape[1], 5) # outputs 5 features else: # is a ndarray ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 5) # 5 features names = response["names"] self.assertEqual(len(names), 5) # 5 feature names
def test_hotel_bookings(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/hotel_bookings.csv", group_col="hotel", period="mês", date_col="reservation_status_date", target_col="reservation_status", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.hotel_bookings_testdata() with server.Server() as s: response = s.test(data=data)
def test_experiment_titanic(self): papermill.execute_notebook("Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/titanic.csv", target="Survived", norm="l2", )) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.titanic_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 11) # 11 features self.assertEqual(len(names), 11)
def test_experiment_titanic(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict(dataset="/tmp/data/titanic.csv", features_to_filter=["Name"]), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.titanic_testdata_full() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 11) # 12 features - 1 removed self.assertEqual(len(names), 11)
def test_experiment_iris_empty_return(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict(dataset="/tmp/data/iris.csv", target="SepalLengthCm", condition="Menor que", value=5), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.iris_testdata_full() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray), 0) # 0 features self.assertEqual(len(names), 5)
def test_experiment_iris(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/iris.csv", target="Species", norm="l2", ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.iris_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 4) # 4 features self.assertEqual(len(names), 4)
def test_experiment_hotel_bookings(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict(dataset="/tmp/data/hotel_bookings.csv", features_to_filter=[ "reservation_status_date", "arrival_date_year" ]), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.hotel_bookings_testdata_full() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 30) # 32 features - 2 removed self.assertEqual(len(names), 30)
def test_experiment_boston(self): papermill.execute_notebook( "Experiment.ipynb", "/dev/null", parameters=dict( dataset="/tmp/data/boston.csv", target="medv", cutoff=0.9, threshold=0.0, ), ) papermill.execute_notebook( "Deployment.ipynb", "/dev/null", ) data = datasets.boston_testdata() with server.Server() as s: response = s.test(data=data) names = response["names"] ndarray = response["ndarray"] self.assertEqual(len(ndarray[0]), 12) # 13 features - 1 removed self.assertEqual(len(names), 12)