def main(): args = parse_args() ctx = mx.gpu(args.gpu) load_classifier('model/ali', ctx) rgb_mean = [0, 0, 0] while True: try: if not runnable: logger.info('EXIT NOW!') break data = None for priority in range(COUNT_PRIORITY_QUEUES): queue = 'ALI_CLASSIFICATION_TEST_INPUT_{}'.format(priority) data = dequeue(queue, False) if data: logger.info('Dequeued from {}: {}'.format(queue, data)) break if not data: time.sleep(0.1) continue image_key = data['image_file'] context = data.get('context', None) image_name = data.get('image_name', '') output_queue = data.get('output_queue', '') from_detectron = data.get('from_detectron', False) others_threshold = data.get('others_threshold', 0.7) save_root = data.get('save_root', '') save_folder = data.get('save_folder', '') logger.info('Processing {}...'.format(image_key)) result = {'context': context, 'enqueue_at': time.time()} im = image_store.get_as_image(image_key) if im is None: continue patches = generate_patches(im, rgb_mean) cls_name, score = classify_patches(patches, 128, others_threshold) score = np.asscalar(score) save_patch(save_root, save_folder, cls_name, score, image_name) result['class_detections'] = {'image': image_name} logger.info('Enqueued to {}: {}'.format(output_queue, result)) enqueue(output_queue, result) except Exception as e: logger.error(e, exc_info=True)
def test_load(self): clf_loaded = load_classifier() y_pred_loaded = clf_loaded.predict(self.X_test) y_pred = self.clf.predict(self.X_test) mean1 = np.mean(y_pred_loaded == self.y_test) mean2 = np.mean(y_pred == self.y_test) self.assertEqual(mean1, mean2)
def classify_tweet(): text = request.args.get('text', 'God is good', type=str) clf = request.args.get('clf', 'Maximum Enthropy', type=str) classifier = act.load_classifier(clf) text = act.processTweet(text) text_sentiment = act.get_sentiment(text, classifier) return jsonify(sentiment=text_sentiment)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-model', type=str, help='Classifier model path') parser.add_argument('-img', type=str, help='Image path') parser.add_argument('-name', type=str, default=None, help='Cat-to-name file path') parser.add_argument('-mean', type=float, default=[0.485, 0.456, 0.406], help='Dataset mean') parser.add_argument('-std', type=float, default=[0.229, 0.224, 0.225], help='Dataset standard deviation') parser.add_argument('-topk', type=int, default=5, help='# predictions to display') parser.add_argument('-device', type=str, default='cpu', choices=['cpu', 'cuda'], help='Device') args = parser.parse_args() classifier = load_classifier(args.model) image = process_img(args.img, args.mean, args.std) probs, cats = predict(image, classifier, args.device, args.topk) show_prediction(args.img, probs, cats, args.name)
def predict_imgs(): imgs = get_imgs() results = [] for img in imgs: rect = img[1] filename = img[0] img_class = img[2] x, y, w, h = rect img = cv2.imread(filename) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img[y:y + h, x:x + w] # get only relevant portion of the image results = results + \ [[img_class, colorsys.hsv_to_rgb( get_dominant_color(img) / 360., 1, 0.5)]] x = map(lambda xi: xi[1], results) y = map(lambda yi: yi[0], results) clf = classifier.load_classifier() predictions = map(lambda x: classifier.reverse_lookup(x), clf.predict(x)) predictions = map(lambda i: [predictions[i], y[i], results[i]], range(len(y))) failed_predictions = filter(lambda x: x[0] != x[1], predictions) print failed_predictions print 'accuracy', 1 - float(len(failed_predictions)) / len(predictions)
def test_predict_probability(self): clf_loaded = load_classifier() email = "Dear Winner, we wish to congratulate and inform you that your email address has won ($2,653,000 two million six hundred and fifty three thousand US Dollars)" p = clf_loaded.predict_proba([email]) p = round(p[0][1], 2) print("The predicted probability of the email being spam is:", p) self.assertGreaterEqual(p, 0) self.assertLessEqual(p, 1)
def predict(filename): img = cv2.imread(filename) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # plt.imshow(img) # plt.show() result = [colorsys.hsv_to_rgb(get_dominant_color(img) / 360., 1, 0.5)] clf = classifier.load_classifier() prediction = classifier.reverse_lookup(clf.predict(result)[0]) print prediction
def main(): print('Loading the model...') model = classifier.load_classifier(model = MODEL, parquet = TRAINING_PARQUET, training_set = TRAINING_SET) print('Running Consumer...') try: consumer = kafka.connectConsumer(topic = TOPIC_INPUT, server = KAFKA_BROKER_URL) print("Consumer connected") except Exception as ex: print("Error connecting kafka broker as Consumer") print(ex) try: producer = kafka.connectProducer(server = KAFKA_BROKER_URL) print("Producer connected") except Exception as ex: print("Error connecting kafka broker as Producer") print(ex) working = True while working: message_dict = kafka.consume(consumer = consumer) if (message_dict != {}): for topic, messages in message_dict.items(): for message in messages: print('Received message: '+str(message.value['domain'])) domain = message.value['domain'] domain_clusters = cluster_utils.parse_cluster(domain, message.value['TaggedClusters']) filtered_list = [] for page_dict in domain_clusters: label = page_dict['cluster_label'] if label == 'product': page_text = page_dict['text'] prediction = classifier.predict(model=model, input=page_text) if prediction == [1]: filtered_list.append(page_dict) else: filtered_list.append(page_dict) content = { 'domain': domain, 'filtered_pages': filtered_list } content_json = json.dumps(content) mongo.put(domain, content_json) print('Data saved on db: collection: ' + str(domain)) kafka.send_message(producer = producer, topic = TOPIC_OUTPUT, message = content)
def main(): print('Loading the model...') model = classifier.load_classifier(model=MODEL, parquet=TRAINING_PARQUET, training_set=TRAINING_SET) print('Running Consumer...') try: partitioner = kafka.get_RoundRobin_partitioner_for_topic( TOPIC_OUTPUT, KAFKA_BROKER_URL) except Exception as ex: print('Error with topic partitions') print(ex) try: consumer = kafka.connectConsumer(topic=TOPIC_INPUT, server=KAFKA_BROKER_URL) print("Consumer connected") except Exception as ex: print("Error connecting kafka broker as Consumer") print(ex) try: producer = kafka.connectProducer(server=KAFKA_BROKER_URL, partitioner=partitioner) print("Producer connected") except Exception as ex: print("Error connecting kafka broker as Producer") print(ex) i = 0 working = True while working: message_dict = kafka.consume(consumer=consumer) if (message_dict != {}): for topic, messages in message_dict.items(): for message in messages: if classifier.predict( model=model, input=message.value['url_page']) == 1: collection = 'Classifier' mongo.put(collection, json.dumps(message.value)) print('Data saved on db: collection: ' + str(collection) + ' url: ' + message.value['url_page']) kafka.send_message(producer=producer, key=i, topic=TOPIC_OUTPUT, message=message.value) i = i + 1
def check(): s = classifier.load_classifier('100x25') print(s)
def run_master(mode, args): if mode not in ['train', 'continue'] and args.restore != '': aa = args.restore.split('/') bb = [] for a in aa: if len(a) > 0: bb.append(a) fld = '/'.join(bb[:-1]) if mode in ['vali', 'vis']: vocab_only = False fld_data, _, _ = get_model_fld(args) path_bias_vocab = fld_data + '/vocab_bias.txt' else: vocab_only = True fld_data = fld path_bias_vocab = fld + '/vocab_bias.txt' else: vocab_only = False fld_data, fld_model, subfld = get_model_fld(args) fld = fld_model + '/' + subfld path_bias_vocab = fld_data + '/vocab_bias.txt' if os.path.exists(path_bias_vocab): allowed_words = [ line.strip('\n').strip('\r') for line in open(path_bias_vocab, encoding='utf-8') ] else: allowed_words = None model_class = args.model_class.lower() if model_class.startswith('fuse'): Master = StyleFusion elif model_class == 'mtask': Master = VanillaMTask elif model_class == 's2s': Master = Seq2Seq elif model_class == 'lm': Master = LanguageModel elif model_class == 's2s+lm': pass else: raise ValueError if model_class == 's2s+lm': master = Seq2SeqLM(args, allowed_words) else: dataset = Dataset(fld_data, max_ctxt_len=args.max_ctxt_len, max_resp_len=args.max_resp_len, vocab_only=vocab_only, noisy_vocab=args.noisy_vocab) master = Master(dataset, fld, args, new=(mode == 'train'), allowed_words=allowed_words) if mode != 'train': if args.restore.endswith('.npz') or model_class == 's2s+lm': restore_path = args.restore else: restore_path = master.fld + '/models/%s.npz' % args.restore master.load_weights(restore_path) if mode in ['vis', 'load']: return master if args.clf_name.lower() == 'holmes': CLF_NAMES = [ 'classifier/Reddit_vs_Holmes/neural', 'classifier/Reddit_vs_Holmes/ngram' ] elif args.clf_name.lower() == 'arxiv': CLF_NAMES = [ 'classifier/Reddit_vs_arxiv/neural', 'classifier/Reddit_vs_arxiv/ngram' ] else: CLF_NAMES = [args.clf_name] print('loading classifiers ' + str(CLF_NAMES)) master.clf_names = CLF_NAMES master.classifiers = [] for clf_name in CLF_NAMES: master.classifiers.append(load_classifier(clf_name)) print('\n' + fld + '\n') if mode in ['continue', 'train']: ss = ['', mode + ' @ %i' % time.time()] for k in sorted(args.__dict__.keys()): ss.append('%s = %s' % (k, args.__dict__[k])) with open(master.fld + '/args.txt', 'a') as f: f.write('\n'.join(ss) + '\n') if args.debug: batch_per_load = 1 else: if PHILLY: n_sample = 1280 # philly unstable for large memory else: n_sample = 2560 batch_per_load = int(n_sample / BATCH_SIZE) if mode == 'continue': master.vali() master.train(batch_per_load) elif 'summary' == mode: print(master.model.summary()) elif mode in ['cmd', 'test', 'vali']: classifiers = [] for clf_name in CLF_NAMES: classifiers.append(load_classifier(clf_name)) if 'vali' == mode: data = master.get_vali_data() s_decoded = eval_decoded( master, data, classifiers=classifiers, corr_by_tgt=True, r_rand=args.r_rand, calc_retrieval=('holmes' in args.data_name.lower()))[0] s_surrogate = eval_surrogate(master, data)[0] print(restore_path) print() print(s_decoded) print() print(s_surrogate) return """ if model_class != 's2s+lm': with tf.variable_scope('base_rankder', reuse=tf.AUTO_REUSE): fld_base_ranker = 'restore/%s/%s/pretrained/'%(args.model_class.replace('fuse1','fuse'), args.data_name) dataset_base_ranker = Dataset(fld_base_ranker, max_ctxt_len=args.max_ctxt_len, max_resp_len=args.max_resp_len, vocab_only=True, noisy_vocab=False) base_ranker = Master(dataset_base_ranker, fld_base_ranker, args, new=False, allowed_words=master.allowed_words) path = fld_base_ranker + '/' + open(fld_base_ranker+'/base_ranker.txt').readline().strip('\n') base_ranker.load_weights(path) print('*'*10 + ' base_ranker loaded from: '+path) else: """ base_ranker = None def print_results(results): ss = [ 'total', 'logP', 'logP_c', 'logP_b', 'rep', 'len', ] + ['clf%i' % i for i in range(len(CLF_NAMES))] print('; '.join([' ' * (6 - len(s)) + s for s in ss])) for score, resp, terms in results: print('%6.3f; ' % score + '; '.join(['%6.3f' % x for x in terms]) + '; ' + resp) if 'cmd' == mode: while True: print('\n---- please input ----') inp = input() infer_args = parse_infer_args() if inp == '': break results = infer_rank(inp, master, infer_args, base_ranker=base_ranker) print_results(results) elif 'test' == mode: infer_args = parse_infer_args() path_out = args.path_test + '.hyp' open(path_out, 'w', encoding='utf-8') for line in open(args.path_test, encoding='utf-8'): line = line.strip('\n') inp = line.split('\t')[0] results = infer_rank(inp, master, infer_args, base_ranker=base_ranker) lines = [] for _, hyp, _ in results[:min(10, len(results))]: lines.append(line + '\t' + hyp.replace(' _EOS_', '').strip()) with open(path_out, 'a', encoding='utf-8') as f: f.write('\n'.join(lines) + '\n') """ path_in = DATA_PATH + '/test/' + args.test_fname if not PHILLY: fld_out = master.fld + '/eval2/' else: fld_out = OUT_PATH makedirs(fld_out) npz_name = args.restore.split('/')[-1].replace('.npz','') path_out = fld_out + '/' + args.test_fname + '_' + npz_name test_master(master, path_in, path_out, max_n_src=args.test_n_max, base_ranker=base_ranker, baseline=args.baseline, r_rand=args.r_rand) """ else: raise ValueError
TP = np.sum(y_true * y) FN = np.sum(y_true * (1. - y)) FP = np.sum((1 - y_true) * y) if FN + TP + FP > 0: return TP / (FN + TP + FP) else: return 1. ############################################################################## # Histograma sobre X_val ############################################################################### from classifier import load_classifier model = load_classifier('/home/carmelocuenca/tmp/hdf5s/' + grid_search[-1][2] + '.hdf5') y_pred = model.predict(X_val, batch_size=16, verbose=0) # the histogram of the data n, bins, patches = plt.hist(y_pred.ravel(), 50, density=True, facecolor='g', alpha=0.75) plt.xlabel('Probabilidad') plt.ylabel('% Densidad de probabilidad') plt.title('Histogram') plt.axis([0, 1., 0., 5 + np.max(n[1:])]) plt.grid(True) plt.show()
def use_model(id): global classifier_clf classifier_clf = load_classifier(id) return render_template('/model/use-model.html')
def build_combined(self, classifier_path=None, classifier_weight=None): optimizer = Adam(0.0002, 0.5) self.d_N.compile(loss='mse', optimizer=optimizer, metrics=['accuracy']) self.d_P.compile(loss='mse', optimizer=optimizer, metrics=['accuracy']) # Input images from both domains img_N = Input(shape=self.img_shape) img_P = Input(shape=self.img_shape) # Translate images to the other domain fake_P = self.g_NP(img_N) fake_N = self.g_PN(img_P) # Translate images back to original domain reconstr_N = self.g_PN(fake_P) reconstr_P = self.g_NP(fake_N) # Identity mapping of images img_N_id = self.g_PN(img_N) img_P_id = self.g_NP(img_P) # For the combined model we will only train the generators self.d_N.trainable = False self.d_P.trainable = False # Discriminators determines validity of translated images valid_N = self.d_N(fake_N) valid_P = self.d_P(fake_P) if classifier_path is not None and os.path.isfile(classifier_path): self.classifier = load_classifier(classifier_path, self.img_shape) self.classifier._name = "classifier" self.classifier.trainable = False class_N_loss = self.classifier(fake_N) class_P_loss = self.classifier(fake_P) # Combined model trains generators to fool discriminators self.combined = Model(inputs=[img_N, img_P], outputs=[ valid_N, valid_P, class_N_loss, class_P_loss, reconstr_N, reconstr_P, img_N_id, img_P_id ]) self.combined.compile( loss=['mse', 'mse', 'mse', 'mse', 'mae', 'mae', 'mae', 'mae'], loss_weights=[ 1, 1, classifier_weight, classifier_weight, self.lambda_cycle, self.lambda_cycle, self.lambda_id, self.lambda_id ], optimizer=optimizer) else: # Combined model trains generators to fool discriminators self.combined = Model(inputs=[img_N, img_P], outputs=[ valid_N, valid_P, reconstr_N, reconstr_P, img_N_id, img_P_id ]) self.combined.compile( loss=['mse', 'mse', 'mae', 'mae', 'mae', 'mae'], loss_weights=[ 1, 1, self.lambda_cycle, self.lambda_cycle, self.lambda_id, self.lambda_id ], optimizer=optimizer)
def getresult(): checked_url = False try: # RECEIVE DATA FROM INDEX WEB PAGE if 'noTweets' in request.form: no_tweets = request.form['noTweets'] else: checked_url = True # Used to determine if i used the initial form or the advanced options no_tweets = 50 if 'selClf' in request.form: name_clf = request.form['selClf'] else: name_clf = 'Maximum Enthropy' if 'advQuery' in request.form: query = request.form['advQuery'] else: query = request.form['search'] if 'url' in request.form: checked_url = True # GET USER ACCESS KEYS token = session['twitter_token'][0] token_secret = session['twitter_token'][1] # GET TWEETS DATA FRAME BASED ON SEARCH QUERY # data = get_data(query, token=token, token_secret=token_secret) print 'Loading ' + name_clf + ' classifier...' clf = act.load_classifier(classifier=name_clf) print "Classifier completely loaded" print 'Connecting to Twitter...' print 'Getting ' + str(no_tweets) + ' tweets' twitter_result = search_twitter(query, no_tweets=no_tweets, token=token, token_secret=token_secret) print str(no_tweets) + 'Tweets Retrieved Successfully!' if checked_url: print 'Removing Tweets containing URL...' twitter_result = act.removeTweetsWithUrl(twitter_result) print 'Removed tweets containing URL' print 'Trying to create a Pandas DataFrame...' print 'Classifying Tweets...' data = act.toDataFrame(twitter_result, clf) print 'Tweets classified Correctly!' print 'Finishing up the DataSet' data = act.post_dataset(data) print 'DONE with dataset' script_bar, div_bar, all_data, piedata, freq, no_positive, no_negative = performComputation(data) top_10_tweets = get_top_tweets(all_data) resp = make_response( render_template('result.html', tot_tweets=no_tweets, no_positive=no_positive, no_negative=no_negative, div_bar=div_bar, script_bar=script_bar, piedata=piedata, freq=freq, topTweets=top_10_tweets, full_data=all_data, pd=pd)) return resp except Exception as e: print e return render_template('error.html', error=e)
def main(): args = parse_args() ctx = mx.gpu(args.gpu) # load_classifiers('model', ctx) load_classifier('model/bottle', ctx) # load_classifier('model/box', ctx) rgb_mean = [0, 0, 0] while True: try: if not runnable: logger.info('EXIT NOW!') break data = None for priority in range(COUNT_PRIORITY_QUEUES): queue = 'SNAPSHOT_CLASSIFICATION_INPUT_{}'.format(priority) data = dequeue(queue, False) if data: logger.info('Dequeued from {}: {}'.format(queue, data)) break if not data: time.sleep(0.1) continue image_key = data['image_file'] bboxes = data['bboxes'] context = data.get('context', None) classification_type = data.get('type', 1) # 1: bottle, 2: box output_queue = data['output_queue'] from_detectron = data.get('from_detectron', False) others_threshold = data.get('others_threshold', 0.7) logger.info('Processing {}...'.format(image_key)) result = { 'context': context, 'enqueue_at': time.time() } im = image_store.get_as_image(image_key) if im is None: continue if len(bboxes) == 0: result['detections'] = [] else: if not from_detectron: # 标注工具结果 rotate = data.get('rotate', 0) if rotate != 0: rot_mat = cv2.getRotationMatrix2D((im.shape[1] / 2, im.shape[0] / 2), rotate, 1) im = cv2.warpAffine(im, rot_mat, (im.shape[1], im.shape[0])) bboxes_ = [] for bbox in bboxes: bbox[0] = min(max(bbox[0], 0), im.shape[1]) bbox[1] = min(max(bbox[1], 0), im.shape[0]) bbox[2] = min(max(bbox[2], 0), im.shape[1]) bbox[3] = min(max(bbox[3], 0), im.shape[0]) if bbox[2] - bbox[0] > 0 and bbox[3] - bbox[1] > 0: bboxes_.append(bbox) bboxes = bboxes_ patches = [] patches += generate_patches(im, rgb_mean, bboxes) if H_FLIP_AUG: patches += generate_patches_h_flip(im, rgb_mean, bboxes) if SCALE_720_AUG: patches += generate_patches_resize(im, rgb_mean, bboxes, 720) cls_names, scores = classify_patches(patches, classification_type, 128, others_threshold) count_groups = len(patches) / len(bboxes) if count_groups > 1: cls_names, scores = merge_groups(cls_names, scores, count_groups) detections = [] for i, bbox in enumerate(bboxes): detections.append(bbox + [cls_names[i]] + [np.asscalar(scores[i])]) result['detections'] = detections else: # Detectron检出后进一步分类 patches = [] patches += generate_patches(im, rgb_mean, bboxes) if H_FLIP_AUG: patches += generate_patches_h_flip(im, rgb_mean, bboxes) if SCALE_720_AUG: patches += generate_patches_resize(im, rgb_mean, bboxes, 720) cls_names, scores = classify_patches(patches, classification_type, 128, others_threshold) count_groups = len(patches) / len(bboxes) if count_groups > 1: cls_names, scores = merge_groups(cls_names, scores, count_groups) cls_dets = {} for i, bbox in enumerate(bboxes): score = np.asscalar(scores[i]) cls_dets.setdefault(cls_names[i], []).append(bbox + [score]) if score < 0.95: save_patch(image_key, im, bbox, cls_names[i], score) # if classification_type != 1 and '其他' in cls_dets: # box中过滤“其他”,很有可能是错误检出 # del cls_dets['其他'] result['class_detections'] = cls_dets logger.info('Enqueued to {}: {}'.format(output_queue, result)) enqueue(output_queue, result) except Exception as e: logger.error(e, exc_info=True)