def index_page(text="", prediction_message=""): """Main page of web app""" if request.method == "POST": clf = Classifier(DEFAULT_MODEL_PATH, DEFAULT_VECTORIZER_PATH, DEFAULT_MLB_PATH) dialogue = request.form["text"] prediction = clf.predict(dialogue) prediction_message = " ".join(sorted(prediction)) return render_template('prediction_page.html', text=text, prediction_message=prediction_message)
def __init__(self, parent, controller): tk.Frame.__init__(self, parent) self.file = None self.clf = Clf('./results/pascalvoc_A.pt') backButton = tk.Button(self, text='Back', command=lambda: controller.show_frame(MainView)) backButton.pack(side='top', fill='x') self.leftFrame = tk.Frame(self) self.leftFrame.pack(side="left", fill="both", expand=True) self.rightFrame = tk.Frame(self) self.rightFrame.pack(side="right", fill="both", expand=True) self.imgPanel = tk.Label(self.leftFrame, text=str(self.file or 'No file uploaded')) self.imgPanel.pack(side="left", fill="both", expand=True) self.results = tk.Label(self.rightFrame, text='Prediction:\n {}'.format(None), anchor='e') self.results.config(font=('Arial', 14)) self.results.pack(side='top', fill='y', expand=True) openFile = tk.Button(self.rightFrame, text="Open a Image", command=self.uploadFile) openFile.config(bg='#8e8d8d', font=('Arial', 14)) openFile.pack(side='bottom', fill='both', expand=True)
def outputCsv(c: classifier.Classifier) -> None: filename = '/tmp/%s_%d%s.csv' % (FLAGS.model, FLAGS.limit, '_diff' if FLAGS.csv_diff_only else '') with open(filename, 'w') as csvFile: writer = csv.writer(csvFile) writer.writerow([ 'sharedId', 'sequence', 'training_labels', 'predicted_sure', 'predicted_unsure', 'revised_training_labels' ]) with sessionLock: samples: List[ClassificationSample] = list( ClassificationSample.query.find( dict(model=FLAGS.model, use_for_training=True)).sort([ ('seqHash', -1) ]).limit(FLAGS.limit)) predicted = c.classify([s.seq for s in samples]) for sample, pred in zip(samples, predicted): train_str = ';'.join([l.topic for l in sample.training_labels]) sorted_pred: List[Tuple[str, float]] = sorted(pred.items(), key=lambda e: -e[1]) pred_sure_str = ';'.join( [t for t, q in sorted_pred if q >= FLAGS.csv_sure]) pred_unsure_str = ';'.join( [t for t, q in sorted_pred if q < FLAGS.csv_sure]) if not FLAGS.csv_diff_only or train_str != pred_sure_str: writer.writerow([ sample.sharedId, sample.seq, train_str, pred_sure_str, pred_unsure_str, '' ]) print('Wrote %s.' % filename)
def test_missing_instance_dir(self, fs: FakeFilesystem) -> None: fs.add_real_directory('./testdata/test_model/test_instance_unreleased') model_path = os.path.join(self.BASE_CLASSIFIER_PATH, 'test_model') with pytest.raises(Exception, match=('No valid instance of model found in %s, ' + 'instances were %s') % (model_path, r'\[\'test_instance_unreleased\'\]')): Classifier(self.BASE_CLASSIFIER_PATH, 'test_model')
def test_missing_labels_file(self, fs: FakeFilesystem) -> None: fs.add_real_directory('./testdata/test_model/test_instance') fs.remove_object('./testdata/test_model/test_instance/label.vocab') with pytest.raises( Exception, match=(r'Failure to load labels file from {0} with exception'). format('./testdata/test_model/test_instance/label.vocab')): Classifier(self.BASE_CLASSIFIER_PATH, 'test_model')
def test_classify(self, fs: FakeFilesystem) -> None: fs.add_real_directory('./testdata/test_model/test_instance') fs.add_real_directory('./testdata/test_model/test_instance_unreleased') c = Classifier('./testdata', 'test_model') result = c.classify(['Where is my medical book?']) assert c.vocab is not None assert c.embedder is not None assert c.predictor is not None assert c.instance == 'test_instance' print(result) assert result # result ~ [{topic: probability, topic2: probability, ...}, ...] for topic, _ in result[0].items(): assert topic in c.vocab assert result[0]['Right to education'] >= 0.7
def test_classify(self, fs: FakeFilesystem) -> None: fs.add_real_directory('./testdata/test_model/test_instance') fs.add_real_directory('./testdata/test_model/test_instance_unreleased') c = Classifier('./testdata', 'test_model') result = c.classify(['Increase access to health care']) assert c.labels is not None assert c.embedder is not None assert c.predictor is not None assert c.instance == 'test_instance' assert result # result ~ [{topic: probability, topic2: probability, ...}, ...] for topic, _ in result[0].items(): assert topic in c.labels assert len(result) == 1 assert result[0]['Right to health'] >= 0.8
def test_missing_model(self, fs: FakeFilesystem) -> None: instance_path = os.path.join(self.BASE_CLASSIFIER_PATH, 'test_model', 'test_instance_missing_model') fs.add_real_directory(instance_path) with pytest.raises(Exception, match=('SavedModel file does not exist at: {0}' ).format(instance_path)): Classifier(self.BASE_CLASSIFIER_PATH, 'test_model')
def test_missing_variables(self, fs: FakeFilesystem) -> None: instance_path = os.path.join(self.BASE_CLASSIFIER_PATH, 'test_model', 'test_instance_missing_variables') fs.add_real_directory(instance_path) with pytest.raises( Exception, match=('{0}/variables; No such file or directory'.format( instance_path))): Classifier(self.BASE_CLASSIFIER_PATH, 'test_model')
def test_invalid_bert(self, fs: FakeFilesystem) -> None: bad_bert_path = './bad/path/to/bert' config = """ { "bert": "%s", "labels": "label.vocab", "is_released": true, "description": "This is the latest model from Sascha.", "metadata": { "thesaurus": "issues" } } """ % (bad_bert_path) fs.add_real_directory('./testdata/test_model/test_instance') fs.remove_object('./testdata/test_model/test_instance/config.json') fs.create_file('./testdata/test_model/test_instance/config.json', contents=config) with pytest.raises(Exception, match='SavedModel file does not exist at'): c = Classifier(self.BASE_CLASSIFIER_PATH, 'test_model') # Bad bert is only used on uncached embed. c.classify(['some string'])
def classifier(): form = ClassifierForm() if form.validate_on_submit(): text = form.index.data category = Classifier(form.index.data) articles = Articles.query.filter_by(topic=category).limit(10) #article = SimilarArticles.query.filter_by(id=form.index.data).first() return render_template('results.html', title='Search Results', category=category, text=text, articles=articles) return render_template('classifier.html', title='Search Article', form=form)
def __init__(self, base_classifier_dir: str, model_name: str = '') -> None: self.logger = logging.getLogger() self.base_classifier_dir = base_classifier_dir self.model_name = model_name self.classifier: Optional[Classifier] = None if self.model_name: try: self.classifier = Classifier(self.base_classifier_dir, model_name) self.topic_infos: Dict[str, ModelStatus.TopicStatus] = {} for t, ti in self.classifier.topic_infos.items(): self.topic_infos[t] = ModelStatus.TopicStatus(t, ti) except Exception: self.logger.info( 'No model %s found in classifier directory=%s' % (model_name, self.base_classifier_dir))
def outputCsv(c: classifier.Classifier) -> None: filename = './%s_%d%s.csv' % (FLAGS.model, FLAGS.limit, '_diff' if FLAGS.csv_diff_only else '') if FLAGS.csv: subset_seqs: List[str] = [] with open(FLAGS.csv, 'r') as csvFile, sessionLock: for row in csv.DictReader(csvFile): subset_seqs.append(row[FLAGS.text_col]) print(subset_seqs[:10]) with open(filename, 'w') as csvFile: writer = csv.writer(csvFile) writer.writerow([ 'sharedId', 'sequence', 'training_labels', 'predictions', 'probabilities' ]) with sessionLock: samples: List[ClassificationSample] = list( ClassificationSample.query.find( dict(model=FLAGS.model, use_for_training=False)).sort([ ('seqHash', -1) ]).limit(FLAGS.limit)) if FLAGS.csv: samples = [ s for s in samples if any(x in s.seq for x in subset_seqs) ] predicted = c.classify([s.seq for s in samples]) for sample, pred in zip(samples, predicted): training_labels = [l.topic for l in sample.training_labels] train_str = ';'.join(sorted(training_labels)) sorted_pred: List[Tuple[str, float]] = sorted(pred.items()) predictions = ';'.join([t for t, q in sorted_pred]) probabilities = ';'.join([str(q) for t, q in sorted_pred]) if not FLAGS.csv_diff_only or train_str != predictions: writer.writerow([ sample.sharedId, sample.seq, train_str, predictions, probabilities ]) print('Wrote %s.' % filename)
def call_cls(urls, callback, kws, labels): """ Provides the communication of the status and results between the classifier process, the main process and the client (when using via web interface). # Input: - urls (list): a list of urls to be classified. - callback(str): the callback url. - kws (list): list of pre-defined keywords in the database. - labels (list): list of pre-defined labels in the database. """ context = zmq.Context() socket = context.socket(zmq.REP) socket.bind('tcp://*:{PORT}'.format(PORT=ZMQ_LISTENING_PORT)) poller = zmq.Poller() poller.register(socket, zmq.POLLIN) print("calling classifier") msg = None while msg is None and callback is None: socks = dict(poller.poll()) if socket in socks: msg = socket.recv() print("callback: ", callback) cls = Classifier(model=model) results = dict() if callback is None: url = urls[0] for status in cls.classify(url, kws, labels): print("status:", status) if type(status) == str: socket.send_string(json.dumps({'status':status, 'url':url})) if status == 'error': break gevent.sleep(0.1) msg = socket.recv() else: socket.send_string(json.dumps(status)) else: print("calculating for direct post") results = [] for url in urls: for status in cls.classify(url, kws, labels): if type(status) == str: data = json.dumps({'status':status + " in url " + url}) print("sending status to callback") requests.post(callback, json=data) if status == 'error': results += [{'url':url, 'restrict':False, 'reasons':['error']}] break else: results += [status] #TODO call update db data = json.dumps({'sites':results}) print("sending results to callback") requests.post(callback, json=data) sys.exit()
import uvicorn from fastapi import FastAPI, File, UploadFile from fastapi.responses import HTMLResponse from app.classifier import Classifier model = Classifier('./data/classfier.h5') app = FastAPI() @app.post('/predict') def predict(image: UploadFile = File(...)): # predict label return model.predict_from_file(image.file)
import logging from datetime import datetime from flask import Blueprint, jsonify, request from utils.config_parser import Config from app.classifier import Classifier logger = logging.getLogger(__name__) api_controller = Blueprint('api_controller', __name__) cl = Classifier.getInstance() @api_controller.route('/ping', methods=['GET']) def ping(): return jsonify("pong", "%s" % datetime.now().isoformat()) @api_controller.route('/hs', methods=['POST']) def post_hs(): json = request.json if json is None: return "Bad request", 400 if 'body' not in json: return "Body not found in json", 400 text = str(json['body']) logger.debug("TEXT {}".format(text)) results = cl.check(text)
class Main_1(tk.Frame): def __init__(self, parent, controller): tk.Frame.__init__(self, parent) self.file = None self.clf = Clf('./results/pascalvoc_A.pt') backButton = tk.Button(self, text='Back', command=lambda: controller.show_frame(MainView)) backButton.pack(side='top', fill='x') self.leftFrame = tk.Frame(self) self.leftFrame.pack(side="left", fill="both", expand=True) self.rightFrame = tk.Frame(self) self.rightFrame.pack(side="right", fill="both", expand=True) self.imgPanel = tk.Label(self.leftFrame, text=str(self.file or 'No file uploaded')) self.imgPanel.pack(side="left", fill="both", expand=True) self.results = tk.Label(self.rightFrame, text='Prediction:\n {}'.format(None), anchor='e') self.results.config(font=('Arial', 14)) self.results.pack(side='top', fill='y', expand=True) openFile = tk.Button(self.rightFrame, text="Open a Image", command=self.uploadFile) openFile.config(bg='#8e8d8d', font=('Arial', 14)) openFile.pack(side='bottom', fill='both', expand=True) def predict(self, img_path): self.results.config(text='Prediction:\n {}'.format('Calculating!!!')) self.results.update_idletasks() results = self.clf.predict(img_path) results_string = '\n'.join(results) if not results_string: results_string = None self.results.config(text='Prediction:\n {}'.format(results_string)) self.results.update_idletasks() return def uploadFile(self): f = filedialog.askopenfilename() self.file = f _, ext = os.path.splitext(f) valid_ext = ['.png', '.jpg', '.jpeg', '.JPG', '.PNG'] if ext not in valid_ext: self.imgPanel.configure(text='Invalid File Type: {}'.format(ext), image='') self.imgPanel.image = None self.imgPanel.update_idletasks() return else: to_predict = ImageTk.PhotoImage(pad_and_resize(f, 400)) self.imgPanel.config(image=to_predict) self.imgPanel.image = to_predict self.imgPanel.update_idletasks() self.predict(f)
def test_missing_model_dir(self) -> None: with pytest.raises( Exception, match='Invalid model path: ./testdata/missing_model'): Classifier(self.BASE_CLASSIFIER_PATH, 'missing_model')
print "%d republican speeches" % len(rep_speeches) print "%d democratic speeches" % len(dem_speeches) # ipdb.set_trace() # bayseian_prior_a_rep = len(rep_speeches) / len(speeches) # bayseian_prior_b_dem = len(dem_speeches) / len(speeches) # this frame vocabulary proba has tuples for the proba of class a and b # frame_vocabulary_proba = { word: vocabulary_proba[word] if vocabulary_proba.get(word) != None else [0, 0] for word in frame.word_string.split() } # sum_log_probability_a_rep = sum(map(lambda (word,log_probabilities): log_probabilities[0],frame_vocabulary_proba.items())) # sum_log_probability_b_dem = sum(map(lambda (word,log_probabilities): log_probabilities[1],frame_vocabulary_proba.items())) # final_prob_a = bayseian_prior_a_rep * sum_log_probability_a_rep # final_prob_b = bayseian_prior_b_dem * sum_log_probability_b_dem print "Recompute Naieve Bayes Output For Classifying Frame (%s) Within Window (%s) for phrase %s" % (frame.seed_word, speech_window_key, analysis.phrase) naive_bayes = Classifier(vocab=frame.word_string.split()) training_set = Classifier.bunch_with_targets(speeches, analysis.target_function2) naive_bayes.train_classifier(training_set.data, training_set.target) probabilities = naive_bayes.classify_document(frame.word_string) tfidf_frames_vector = naive_bayes.vectorizer.transform([frame.word_string]) print "Predicted Class: ", naive_bayes.classifier.predict(tfidf_frames_vector)[0] print "Predict Proba: ", naive_bayes.classifier.predict_proba(tfidf_frames_vector)[0] print "Probability A (Rep): ", probabilities[0] print "Probability B (Dem): ", probabilities[1] if probabilities[0] > probabilities[1]: print t.red("A (Rep) NB Proba > B (Dem) NB Proba: Classify Republican") else:
from app.classifier import Classifier Classifier()
# Gunicornを起動する際のエントリーポイント. # 保存してある学習済みのパラメータを読んでClassiferとFlaskアプリをつくる。 # (パラメータファイルのパスは引数じゃなくて環境変数でわたす) # wsgi.py import torch from smart_getenv import getenv from app import create_app from app.classifier import Classifier # パラメータファイルのパスを環境変数から取得 prm_file = getenv("PRM_FILE", default="taco_burrito.prm") # パラメータファイルを読み込む params = torch.load(prm_file, map_location=lambda storage, loc: storage) # ClassifierとFlaskアプリケーションを作成 classifier = Classifier(params) app = create_app(classifier)
# Build Targets print("Build target vector and data vector from documents") def party_fn(speech): if speech.speaker_party == 'D': return 1 elif speech.speaker_party == 'R': return 0 else: raise Exception("Speech must have party 'D' or 'R': " + str(speech.speech_id)) bunch = Classifier.bunch_with_targets(speeches=speeches, target_function=party_fn) data = vectorizer.fit_transform(bunch.data) #.tocsr()#.toarray() # Print Stuff learned_vocabulary = vectorizer.get_feature_names() print("Learned %d words in vocabulary" % len(learned_vocabulary)) print(learned_vocabulary) print("") print("Sparse Matrix of TfIdf Values pf each term for each document") print data target = array(bunch.target) print("") # Run Cross Validation Checks
def test_missing_base_classify_dir(self) -> None: fake_classifier_path = './fake_testdata' with pytest.raises( Exception, match='Invalid base_classifier_dir: ./fake_testdata'): Classifier(fake_classifier_path, 'test_model')