class TestPredict(unittest.TestCase): pred = Predict( model_name='bert-large-uncased-whole-word-masking-finetuned-squad') question = ['what do I love ?'] answers = ['I love food', 'I drink water'] def test__reconstruct_text(self): seqs = [[self.question[0], ans] for ans in self.answers] batch = self.pred.tokenizer.batch_encode_plus(seqs, return_tensors='tf', max_length=128, truncation='only_second', padding=True) tokens_batch = list( map(self.pred.tokenizer.convert_ids_to_tokens, batch['input_ids'])) for i, tokens in enumerate(tokens_batch): rec = self.pred._reconstruct_text(tokens) seq_str = " ".join(seqs[i]) seq_str = "[CLS] " + seq_str.lower() self.assertEqual(seq_str, rec) self.assertTrue(all(isinstance(s, str) for s in rec)) def test_predict_batch(self): self.assertIsNone(self.pred.predict_batch('', [])) predictions = self.pred.predict_batch(self.question[0], self.answers) for pred in predictions: self.assertTrue( all(bb in list(pred.keys()) for bb in ['confidence', 'full_context', 'start', 'end', 'answer'])) self.assertIsInstance(pred['confidence'], float) self.assertIsInstance(pred['full_context'], str) self.assertIsInstance(pred['start'], int) self.assertIsInstance(pred['end'], int) self.assertTrue(pred['answer'] in pred['full_context'])
def __init__(self, project_parameters): self.project_parameters = project_parameters self.predict_object = Predict(project_parameters=project_parameters) self.transform = get_transform_from_file( filepath=project_parameters.transform_config_path)['predict'] self.data_path = None # window self.window = Tk() self.window.geometry('{}x{}'.format( self.window.winfo_screenwidth(), self.window.winfo_screenheight())) self.window.title('Demo GUI') # button self.load_audio_button = Button( self.window, text='load audio', fg='black', bg='white', command=self._load_audio) self.play_audio_button = Button( self.window, text='play audio', fg='black', bg='white', command=self._play_audio) self.recognize_button = Button( self.window, text='recognize', fg='black', bg='white', command=self._recognize) # label self.data_path_label = Label(self.window, text='', fg='black') self.probability_label = Label(self.window, text='', fg='black') self.result_label = Label( self.window, text='', fg='black', font=(None, 50)) # matplotlib canvas # this is Tkinter default background-color facecolor = (0.9254760742, 0.9254760742, 0.9254760742) self.gallery_image_canvas = FigureCanvasTkAgg( Figure(figsize=(5, 5), facecolor=facecolor), master=self.window)
def test_column_added(self): # Exporting test data test_data = pd.read_csv(self.data_path, sep=";") test_result = Predict().execute(test_data, self.model_data_path) added_columns = ['Prediction', 'Target'] self.assertTrue(set(added_columns).issubset(test_result.columns))
def test_columns_removed(self): # Exporting test data test_data = pd.read_csv(self.data_path, sep=";") test_result = Predict().execute(test_data, self.model_data_path) removed_columns = ['FamilySize', 'Name', 'Age', 'Fare', 'Title', 'Sex', 'Embarked', 'PassengerId'] self.assertFalse(set(removed_columns).issubset(test_result.columns))
def command_predict(self): try: if self.predict_class is None: self.__setattr__("predict_class", Predict()) else: self.predict_class.price_from_mileage() except RuntimeError: return False
def predict(): if saved_model: try: # Getting json file from post request json_ = request.json if json_ is None: return jsonify({"message": "text not found"}) else: # Json to data frame data = pd.DataFrame(json_) # Running prediction based on the model predict_outcome = Predict() prediction = predict_outcome.execute(data, saved_model) # Concat data for more detailed json result concat_data = pd.concat( [data, prediction.reindex(data.index)], axis=1) predictions_results = [] for row in concat_data.iterrows(): if row[1]["Prediction"] == 1: predictions_results.append({ "Name": row[1]["Name"], "Result": "Would survive the crash" }) else: predictions_results.append({ "Name": row[1]["Name"], "Result": "Would not survive the crash" }) return jsonify(predictions_results) except: # Return the error return jsonify({traceback.format_exc()}) else: print('There is no model') return 'No model to use'
def command_train(self): if self.train_class is None and self.init_train_class() is False: return False elif self.train_class.thetas is None: thetas = self.train_class.train() if self.predict_class is None: self.__setattr__("predict_class", Predict(thetas)) else: self.predict_class.__setattr__("thetas", thetas) else: print("Already trained, ask me to PREDICT a price now!")
def main(): # project parameters project_parameters = ProjectParameters().parse() assert project_parameters.mode in [ 'train', 'predict', 'predict_gui', 'tuning' ], 'please check the mode argument.\nmode: {}\nvalid: {}'.format( project_parameters.mode, ['train', 'predict', 'predict_gui', 'tuning']) if project_parameters.mode == 'train': result = Train(project_parameters=project_parameters).train() elif project_parameters.mode == 'predict': result = Predict(project_parameters=project_parameters) result = Predict(project_parameters=project_parameters).predict( filepath=project_parameters.root) elif project_parameters.mode == 'predict_gui': from src.predict_gui import PredictGUI result = PredictGUI(project_parameters=project_parameters).run() elif project_parameters.mode == 'tuning': result = Tuning(project_parameters=project_parameters, train_class=Train).tuning() return result
def main(project_parameters): result = None if project_parameters.mode == 'train': result = train(project_parameters=project_parameters) elif project_parameters.mode == 'evaluate': if project_parameters.predefined_dataset is not None: print('temporarily does not support predefined dataset.') else: evaluate(project_parameters=project_parameters) elif project_parameters.mode == 'predict': if project_parameters.use_gui: gui = GUI(project_parameters=project_parameters) gui.run() else: result = Predict(project_parameters=project_parameters)( data_path=project_parameters.data_path) print(('{},' * project_parameters.num_classes).format( *project_parameters.classes)[:-1]) print(result) elif project_parameters.mode == 'tune': result = tune(project_parameters=project_parameters) return result
#! /usr/bin/python import argparse p = argparse.ArgumentParser() p.add_argument('-t', '--train', action='store_true', help='train network') p.add_argument('-o', '--oxford', action='store_true', help='use Oxford-IIIT pet data set') p.add_argument('-v', '--verify', action='store_true', help='verify against test image') p.add_argument('-c', '--clean', action='store_true', help='clean up models') args = p.parse_args() if args.train and args.oxford: from src.train import Train Train().oxford() elif args.verify: from src.predict import Predict Predict().segmentation() elif args.clean: from src.clean import Clean Clean().clean() else: p.print_help()
def test_number_of_columns(self): # Exporting test data test_data = pd.read_csv(self.data_path, sep=";") test_result = Predict().execute(test_data, self.model_data_path) self.assertEqual(len(test_result.columns), 24)
def search(pred: Predict, query: str): return pred.search(query, n_answers=SEARCH_SIZE)
import streamlit as st from src.predict import Predict SEARCH_SIZE = 10 pred = Predict() @st.cache def search(pred: Predict, query: str): return pred.search(query, n_answers=SEARCH_SIZE) if __name__ == "__main__": st.title("Query information retrieval featuring ElasticSearch and " "BERT!") query_input = st.text_input("What question do you want to ask?", max_chars=100) if st.button('Look ! '): with st.spinner("Looking zzzzzz"): res = search(pred, query_input, batch_size=1) for n, doc in enumerate(res): st.markdown(f"*{n}*: **{doc.get('full_context')}**") st.markdown(f"confidence: {doc.get('confidence')}") st.write("\n")