def ejer5_loss(loss_fun, act_fun_last, label, nfig1, nfig2): print(label) reg1 = regularizador.L2(0.1) reg2 = regularizador.L2(0.1) proto = clasificador.Classifier(epochs=300, batch_size=50, eta=0.001) outputfile = "ejer5_" + label + "_v3.dat" (x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data() mean_train = x_train.mean() n_clasifi = 10 X, Y = clasificador.flattening(x_train, y_train, n_clasifi, mean_train) X_test, Y_test = clasificador.flattening(x_test, y_test, n_clasifi, mean_train) proto.fit(X, Y, X_test, Y_test, act_function1=act.ReLU(0), reg1=reg1, loss_function=loss_fun, act_function2=act_fun_last, reg2=reg2) # plt.figure(nfig1) # plt.ylabel("Accuracy [%]") # plt.plot(proto.acc_vect, label="Entrenamiento", c='red', alpha=0.6, ls='--') # plt.plot(proto.pres_vect, label="Validación", c='blue', alpha=0.6) # plt.legend(loc=0) # plt.savefig("ejer5_acc_"+label+".pdf") # plt.figure(nfig2) # plt.ylabel("Pérdida") # plt.plot(proto.loss_vect, label="Entrenamiento", c='red', alpha=0.6, ls='--') # plt.plot(proto.loss_test, label="Validación", c='blue', alpha=0.6) # plt.legend(loc=0) # plt.savefig("ejer5_loss_"+label+".pdf") # plt.show() #plt.close() #plt.clf() np.savetxt( outputfile, np.array([ proto.acc_vect, proto.pres_vect, proto.loss_vect, proto.loss_test ]).T)
def evaluate_classifier(model_filename): with tf.Graph().as_default(): classfier_net = classifier.Classifier() classification_model = classfier_net.get_model(122, 122) classification_model.load(model_filename) annotations = data.load_annotations() image_list = data.create_image_list(annotations) ok = 0 n = 0 with open(predicted_annotations_path) as data_file: bounding_box_data = json.load(data_file) #bounding_box_data = data.load_annotations() for filepath in image_list: x = int(bounding_box_data[filepath][0]) w = int(bounding_box_data[filepath][1]) y = int(bounding_box_data[filepath][2]) h = int(bounding_box_data[filepath][3]) # Extending bounding box by 10% x -= int(0.1 * w) w += int(0.2 * w) y -= int(0.1 * h) h += int(0.2 * h) crop = cv2.imread(data.get_image_path(filepath)) crop = crop[y:y + h, x:x + w] if crop is None: continue height, width, _ = crop.shape if height == 0 or width == 0: continue crop = cv2.resize(crop, (122, 122)) classification = classification_model.predict([crop])[0] if data.classes[np.argmax(classification)] == data.get_image_label( filepath): ok += 1 #print(data.classes[np.argmax(classification)] + " " + data.get_image_label(filepath)) n += 1 print(ok / n)
def main(): config = get_config() random.seed(config["algorithm"]["random_seed"]) train_data, test_data = acq.extract_data(config) if config["verbose"]["enabled"]: acq.show_data(train_data[0], config["verbose"]["sample_timeout"]) algo = classifier.Classifier(config["algorithm"]["random_seed"]) algo.train(train_data[0], train_data[1]) estimates = algo.test(test_data[0]) algo.eval(estimates, test_data[1]) show_code_in_action(test_data, algo) print("Finished algorithm.")
def _custom_classifier(features): clsf = classifier.Classifier(CLASSES, TRAINING_SAMPLES_LIMIT, features, 'avg') _train_classifier_from_data(clsf) clsf.finalize() stats = _test_custom_classifier(clsf) for cls in sorted(stats.keys()): util.debug("Class {} => {}% correct ({} total)".format(cls, round(stats[cls]['accuracy'] * 100, 1), stats[cls]['total'])) # And some machine-readable output suitable for appending to a CSV for # analysis feature_id = ','.join([ d['name'] for d in features ]) print("decision_mode,features,class,total,accuracy") for cls in stats: print("{},\"{}\",{},{},{}".format(clsf.decision_mode, feature_id, cls, stats[cls]['total'], stats[cls]['accuracy']))
def main(): makeSub = True featureImportance = False cvfold = True df = pd.read_csv('../data/cprobTrain15NA.csv') X, y = np.array(pd.read_csv('../data/train.csv', usecols=range(1, 9))), np.array( pd.read_csv('../data/train.csv').ACTION) X = np.hstack((X, np.array(df))) params = { 'max_depth': 4, 'subsample': 0.5, 'verbose': 0, 'random_state': 1337, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_features': 10, 'n_estimators': 350, 'learning_rate': 0.05 } clf = GradientBoostingClassifier(**params) prefix = 'lib/gbm350d4m10c15' if cvfold: c = classifier.Classifier(X, y) c.validate(clf, nFolds=10, out=prefix + 'Train.csv') if makeSub: Xt = np.array(pd.read_csv('../data/test.csv', usecols=range(1, 9))) Xt = np.hstack( (Xt, np.array(pd.read_csv('../data/cprobTest15NA.csv')))) clf.fit(X, y) y_ = clf.predict_proba(Xt)[:, 1] out = pd.read_csv('subs/nbBaseTest.csv') out.ACTION = y_ out.to_csv(prefix + 'Test.csv', index=False) if featureImportance: print "Feature ranking:" importances = clf.feature_importances_ indices = np.argsort(importances)[::-1] np.savetxt('indices.txt', indices, delimiter=',') for f in xrange(df.shape[1]): print "%d. feature (%s,%f)" % (f + 1, df.columns[indices[f]], importances[indices[f]])
def predict(): """ PREDICT THE POTENTIAL PIECE OF EQUIPMENT BASED ON THE REQUEST """ """ RECEIVING JSON FORMAT: { "body" : "imgsource" : *base64 value* } """ # Extract infomation from the JSON bodyJson = request.get_json(force=True).get('body') data = bodyJson.get('imgsource') path = "./temp/test.png" # Decode the BASE64 image and save it into the temp file imgdata = base64.b64decode(str(data)) image = Image.open(BytesIO(imgdata)) img = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB) cv2.imwrite(path, img) r = requests.post( 'https://api.remove.bg/v1.0/removebg', files={'image_file': open('./temp/test.png', 'rb')}, data={ 'size': 'auto', 'bg_color': 'white' }, headers={'X-Api-Key': 'iTUtE8hsnt76HMLmfjPAi2hp'}, ) if r.status_code == requests.codes.ok: with open('./temp/test.png', 'wb') as out: out.write(r.content) else: print("Error:", r.status_code, r.text) # Call the image recognition function predictor = classifier.Classifier() predicted_label = predictor.image_recognition('./temp/test.png') print(predicted_label) max_index = np.argmax(predicted_label[0]) probability = str(float(predicted_label[0][max_index] * 100)) data_response = {MAP.get(max_index): probability} response_json = json.dumps(data_response, indent=4) return Response(response=response_json, status=200)
def main(argv): help = 'main.py [-t -v -f --test]\n' help += '-t or --twitter : runs classifier within twitter GUI\n' help += '-v or --verbose : prints extra information\n' help += '-f [folds] or --folds=[folds] : set number of folds ' help += 'for validation\n' help += '--test : runs test code\n' help += '--timing : runs timing code\n' try: opts, args = getopt.getopt( argv, "htcvf:", ['twitter', 'classifier', 'verbose', 'test', 'timing', 'folds=']) except getopt.GetoptError: print(help) sys.exit(2) twitter = False verbose = False test = False timing = False folds = 5 for opt, arg in opts: if opt == '-h': print(help) sys.exit() elif opt in ('-t', '--twitter'): twitter = True elif opt in ('-v', '--verbose'): verbose = True elif opt in ('--test'): test = True elif opt in ('-f', '--folds'): folds = arg elif opt in ('--timing'): timing = True if test: subprocess.call(['pytest', '..\\Test\\test_net.py']) else: clf = classifier.Classifier(folds=folds, timing=timing) if verbose: clf.run(verb=True) else: clf.run() if twitter: win = twitterGUI.TwitWindow(clf) win.CreateWindow()
def init_classifier(self): ''' SVM initialization, stores the list of SVM objects in a list at instance level ''' svms = [] print '\nSVM initialization:' startt = time.time() t_lab, trn, v_lab, val = self.init_ds() for n in self.names: tmp = classifier.Classifier(self.get_lab(n, t_lab), trn, \ self.get_lab(n, v_lab), val, n) print '\tSVM for %s initialized' % n svms.append(tmp) print 'Classifier initialized in %s sec.\n' % \ (time.time() - startt) self.svms = svms
def __init__(self, classifier_folder): self.class_name = classifier_folder self.image_size = (227, 227) net_model = classifier_folder + "/net.caffemodel" net_proto = classifier_folder + "/deploy.prototxt" net_label = classifier_folder + "/labels.txt" self.classifier = classifier.Classifier(image_size=self.image_size, net=net_proto, weight=net_model, class_label=net_label) labels = self.classifier.get_label_list() super(UI, self).__init__() self.setGeometry(300, 300, 720, 720) self.setWindowTitle(self.class_name) self.w = dragarea() self.w.dropEvent = self.load_images_dd self.root_vbox = QtGui.QVBoxLayout() self.w.setLayout(self.root_vbox) self.setCentralWidget(self.w) self.detail_btn = QtGui.QPushButton("Details") self.detail_btn.clicked.connect(self.click_detail) self.detail_w = detail_panel(labels) self.detail_page = QtGui.QWidget() self.controller_hbox = QtGui.QHBoxLayout() self.detail_page.setLayout(self.controller_hbox) self.prev_image = QtGui.QPushButton(" < prev") self.prev_image.clicked.connect(self.prev_detail) self.next_image = QtGui.QPushButton(" next >") self.next_image.clicked.connect(self.next_detail) self.return_table = QtGui.QPushButton(" score table ") self.return_table.clicked.connect(self.display_table) self.controller_hbox.addWidget(self.prev_image) self.controller_hbox.addWidget(self.return_table) self.controller_hbox.addWidget(self.next_image) self.image_added = False self.image_acceptable = True self.detail_view = False self.detail_index = -1 self.table = None self.images = [] self.scores = [] self.show()
def trainClassifer (third_dataset, learning_rate=0.0001, decay_rate=0.99, batch_size=10, training_epochs=10, display_step=1, n_samples=1000, noise=1): cl = classifier.Classifier(learning_rate=learning_rate) for epoch in range(training_epochs): avg_cost = 0. total_batch = int(n_samples / batch_size) for i in range(total_batch): batch_xs, batch_ys = third_dataset[i*batch_size: (i+1)*batch_size], train_labels[i*batch_size: (i+1)*batch_size] cl.train_step.run({cl.x: batch_xs, cl.y_: batch_ys}) # Display logs per epoch step if epoch % 10 == 0: print("Epoch:", '%04d' % (epoch+1), \ "cross_entropy=", "{:.9f}".format(cl.cross_entropy.eval({cl.x: third_dataset, cl.y_: train_labels[0:n_samples]}))) cl.learning_rate *= decay_rate # Test trained model print(cl.accuracy.eval({cl.x: third_dataset, cl.y_: train_labels[0:n_samples]}))
def HyperSearch(): # Courtesy of Miroslaw Horbal base = [127, 96, 53, 3, 103, 71, 151, 1, 65, 152] f = fileio.Preprocessed('../data/quads10Threshold.csv') f.encode(base) train, truth = f.transformTrain(base) print "Performing hyperparameter selection..." clf = LogisticRegression(C=2.3, class_weight='auto') # Hyperparameter selection loop score_hist = [] Cvals = np.linspace(1, 4, 32) eval_ = classifier.Classifier(train, truth) for C in Cvals: clf.C = C score = eval_.holdout(clf, nFolds=10, fraction=0.2) score_hist.append((score, C)) print "C: %f Mean AUC: %f" % (C, score) bestC = sorted(score_hist)[-1][1] print "Best C value: %f" % (bestC)
def classify_posts(posts): with classifier.Classifier() as c: for post in posts: timestamp = post['taken_at_timestamp'] picture_url = post['display_url'] print(f"checking url {picture_url}") if queries.image_exists(picture_url): print(f"\talready processed. skipping {picture_url}") continue print(f"\timage not loaded, adding to db {picture_url}") image = Image(url=picture_url, post_date=timestamp) print(f"\tclassifying image {picture_url}") c.classify(image) queries.add(image) queries.commit()
def classification_test(verbose=True): rdr = reader.Reader() clssfr = classifier.Classifier() test_data_tables = rdr.get_classifier_test_data_tables() correct_count = 0 total_count = 0 for data_table in test_data_tables: col_idx = 0 while True: col = data_table.get_col(col_idx) if col == None: # out of columns to read break (header, records) = col classified_type = clssfr.classify(col_idx, header, records) correct_type = data_table.get_type(col_idx) if verbose or classified_type != correct_type: print("Column :", data_table.csv_file, "(Column " + str(col_idx) + ")") print("Classified:", classified_type) print("Correct :", correct_type) if verbose and classified_type == correct_type: print() if classified_type == correct_type: correct_count += 1 else: print("Header :", repr(header)) print("Records :", records) print() total_count += 1 col_idx += 1 print("===================================================") print("Overall result:", str(correct_count) + "/" + str(total_count), "(" + str(round(correct_count / total_count * 100, 2)) + "%)", "correct classifications")
def fleetclassify_status(status): #pp.pprint(status) image_url = get_image_url_from_status(status) print("image: " + str(image_url)) if not image_url: return None image_filepath = download_image_file(image_url) print("image filepath: " + str(image_filepath)) image_paths = [image_filepath] result_txt = '' classifier_ = classifier.Classifier(LEARNED_MODEL_FILEPATH) predictions = classifier_.classify(image_paths) for image_path, prediction in zip(image_paths, predictions): if prediction < 0.5: result_txt += "艦これ" else: result_txt += "アズールレーン" result_txt += ' (%.3f)' % (prediction) return result_txt
"--yolo", default='make_model_classifier/yolo-coco', help="base path to YOLO directory") ap.add_argument("-c", "--confidence", type=float, default=0.5, help="minimum probability to filter weak detections") ap.add_argument("-t", "--threshold", type=float, default=0.3, help="threshold when applying non-maxima suppression") args = vars(ap.parse_args()) car_color_classifier = classifier.Classifier() # load the COCO class labels our YOLO model was trained on labelsPath = os.path.sep.join([args["yolo"], "coco.names"]) LABELS = open(labelsPath).read().strip().split("\n") # initialize a list of colors to represent each possible class label np.random.seed(42) COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8") # derive the paths to the YOLO weights and model configuration weightsPath = os.path.sep.join([args["yolo"], "yolov3.weights"]) configPath = os.path.sep.join([args["yolo"], "yolov3.cfg"]) # load our YOLO object detector trained on COCO dataset (80 classes) # print("[INFO] loading YOLO from disk...")
pcad_index = int(list(sys.argv)[1]) window_sizes = [None, 5, 10, 50, 100, 200] window_size = window_sizes[pcad_index % 6] if pcad_index > 5: baselined = True output_txt_dir = os.path.join( out_dir, 'increasing_ordered_PCA_comps_ws_%s_baselined.txt' % str(window_size)) else: baselined = False output_txt_dir = os.path.join( out_dir, 'increasing_ordered_PCA_comps_ws_%s.txt' % str(window_size)) test_class = cl.Classifier() test_class.recordings = recordings test_class.pre_trial_window = 2 test_class.post_trial_window = 2 test_class.make_unit_response(['20Hz_cor_AB', '20Hz_acor_BA', '20Hz_acor_AB'], baseline=baselined) test_class.test_size = 0.2 n_components = 599 pcad_response, y_var = test_class.make_pcad_response( n_components, ['20Hz_cor_AB', '20Hz_acor_AB', '20Hz_acor_BA'], reassign_y_var=[['20Hz_acor_AB', '20Hz_acor_BA']], window_size=window_size) with open(output_txt_dir, 'a') as f: all_accs = []
def predict_aspect_file(self, inputFilePath, outputFilePath, mindmap, language): config_path = 'sentiment_config.ini' import pickle classifier = pickle.load(open("english_classifier.pickle", 'rb'), fix_imports=True, encoding="latin1") print(1) try: clf = classifier.Classifier(config_path=config_path, language=language) except: self.messageBoxSignal.emit("Error", "Failed to load the classifier file") self.clear() return #analyzer = aspect_detector.AspectDetector() asa = aspect_sentiment_analyzer.AspectSentimentAnalyzer() try: data, fieldName = utils.read_excel_dict(inputFilePath) #data.columns = map(str.lower, data.columns) except: self.messageBoxSignal.emit("Error", "Failed to read the input file") self.clear() return full_df = pd.DataFrame() ''' try: verbatim = str(data[0]['verbatim']) except: self.messageBoxSignal.emit("Error","There is no 'verbatim' column in the input file") self.clear() return try: verbatim = str(data[0]['date']) except: self.messageBoxSignal.emit("Error","There is no 'date' column in the input file") self.clear() return ''' progress_step = int(len(data) / 80) c = 0 step = 20 for row in data: verbatim = str(row['verbatim']) cleaned_verbatim = clf.clean_verbatim(verbatim) # return dictionary consist of key as sentences, and value as a tuple consist of category and subcategory predicted_categories, words_in_mindmap_per_sentence = asa.predict_verbatim_aspect( verbatim.lower(), mindmap) rest_data_frame_columns = pd.DataFrame(row, index=[0]) rest_data_frame_columns = rest_data_frame_columns.loc[:, rest_data_frame_columns .columns != 'verbatim'] words_in_mindmap_per_verbatim = utils.change_listoflist_to_list( words_in_mindmap_per_sentence) words_in_mindmap_per_verbatim = ','.join( words_in_mindmap_per_verbatim) #words_in_mindmap_per_sentence=utils.change_listoflist_to_listofstrings(words_in_mindmap_per_sentence) # get month from 'Response Date' Column if '/' in str(row['date']): date = str(row['date']).split("/") elif '-' in str(row['date']): date = str(row['date']).split("-") else: self.messageBoxSignal.emit( "Error", "Please check date format in the file , it should have '/' or '-' separator" ) self.clear() return month = calendar.month_name[int(date[1])] #mindmap_index=0 try: sentiment, cleaned_sentence = clf.predict_sentence( cleaned_verbatim) except: self.messageBoxSignal.emit("Error", "Failed to predict sentiment") self.clear() return if len(predicted_categories) > 0: for sentence in predicted_categories: category_subcategory = predicted_categories[sentence] # check if after cleaning there are still any word in the sentence rest_data_frame_columns['month'] = month rest_data_frame_columns['original verbatim'] = verbatim rest_data_frame_columns[ 'cleaned verbatim'] = cleaned_verbatim rest_data_frame_columns['sentiment'] = sentiment for category, subcategory in category_subcategory: rest_data_frame_columns['category'] = category rest_data_frame_columns['subcategory'] = subcategory #rest_data_frame_columns['words in mindmap'] = words_in_mindmap_per_verbatim + '| ' + words_in_mindmap_per_sentence[mindmap_index] rest_data_frame_columns[ 'words in mindmap'] = words_in_mindmap_per_verbatim full_df = pd.concat([full_df, rest_data_frame_columns], sort=False, axis=0) elif cleaned_verbatim != '': rest_data_frame_columns['month'] = month rest_data_frame_columns['original verbatim'] = verbatim rest_data_frame_columns['cleaned verbatim'] = cleaned_verbatim rest_data_frame_columns['sentiment'] = sentiment rest_data_frame_columns['category'] = "Not Defined" rest_data_frame_columns['subcategory'] = "Not Defined" rest_data_frame_columns['words in mindmap'] = "No Words" full_df = pd.concat([full_df, rest_data_frame_columns], sort=False, axis=0) #mindmap_index+=1 c += 1 if c % progress_step == 0: step += 1 if step <= 99: self.countChanged.emit(step) try: self.countChanged.emit(100) full_df.drop_duplicates(inplace=True) full_df.to_csv(outputFilePath, encoding='utf-8', index=False) self.messageBoxSignal.emit("Done", "The file was written successfully") self.clear() except: self.messageBoxSignal.emit("Error", "Failed to write the file correctly") self.clear() return
if args.study: # To conduct a study with n number of trials as parameter and the type of the model study = paramstudy.conduct_study(args.n_trials, args.model) if args.plot == True: # Functionality for loading a study dump is dropped in the last build since it was not a necessity # and only used during development, but it can be used by removing the comment sign on the next line. # study = joblib.load("Results/Study/convpool_study_6.pkl") paramstudy.generate_graphs_from_study(study) exit() #Load the cfg for the selected model if (args.model == "capsnet"): cfg = variables.capsnet_cfg elif (args.model == "convpool"): cfg = variables.convpool_cfg # Initializate the model TClassifier = classifier.Classifier(cfg) # If selected, load the weights from file if args.load_weights is not None: TClassifier.load_weights(variables.saved_weights_path + args.load_weights) print("Weights loaded.") #If train is selected, train the model if args.train: TClassifier.load_images() TClassifier.train(cfg["epochs"], cfg["mini_batch_size"], cfg["test_batch_size"]) #Plot the results of the training if required if args.plot: TClassifier.plot_loss() TClassifier.plot_accuracy() TClassifier.plot_test_accuracy() # Evaluate the model
n_steps = 100 # number of steps in game C = 1.0 # classifier parameter norm_trsh = 0.001 train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels = dp.read_data( ) m = len(train_dataset[0]) print('data read complete') steps = np.arange(n_initial, len(train_labels), int(np.ceil((len(train_labels) - n_initial) / n_steps))) svm1 = svm.SVC(kernel='linear', C=C).fit(train_dataset, train_labels) err_best = 1 - accuracy_score(test_labels, svm1.predict(test_dataset)) print('best possible performance on test dataset is ', 1 - err_best) classifiers = [] classifier1 = cl.Classifier(train_dataset[:n_initial, :], train_labels[:n_initial], valid_dataset, valid_labels, C, 8.2) err_test1 = 1 - accuracy_score(test_labels, classifier1.predict(test_dataset)) print('classifier1: initiated, error on test dataset is ', err_test1) classifiers.append(classifier1) classifier2 = cl.Classifier(train_dataset[:n_initial, :], train_labels[:n_initial], valid_dataset, valid_labels, C, 8, 'asc') err_test2 = 1 - accuracy_score(test_labels, classifier2.predict(test_dataset)) print('classifier2: initiated, error on test dataset is ', err_test2) classifiers.append(classifier2) classifier3 = cl.Classifier(train_dataset[:n_initial, :], train_labels[:n_initial], valid_dataset, valid_labels, C, 12, 'desc')
import base64 import json import os import tempfile import classifier import picto_matcher import web urls = ('/post_capture', 'PostCapture') db_file = './../synsets/database/synset.sqlite3' db = web.database(dbn='sqlite', db=db_file) cl = classifier.Classifier() app = web.application(urls, globals()) class PostCapture(object): def POST(self): data = web.input() wnid = data["wnid"] imagestring = data["captured_picto"] fd, path = tempfile.mkstemp(suffix=".jpeg", prefix='pc_')
#-------Samples of the training set to reducte computational time-------# # n_train_s = 80000 # X_train_s, y_train_s = sampling(X_train, y_train, n_train_s, balanced=False) # text+= '\n-sampled train = True (%s samples)'%n_train_s #---------------- #print 'classRatio = ', float(len(y_train[y_train == 1]))/len(y_train[y_train == 0]) ####################################################################################################################### # Train and predict # ####################################################################################################################### print '--Training \n' clf = classifier.Classifier(verbose=True) start = time.time() #clf.fit(X_train_s, y_train_s) clf.fit(X_train, y_train) #weight = train_mrf(y_pred_svm_train, img_train, nb_class, max_map_iter, [alpha, beta, sigma_blur], threshold_learning, y_train, threshold_sensitivity, threshold_error) training_time = time.clock() - start print '\n--Prediction \n' start = time.time() y_pred = parallized_pred(X_test, clf) y_pred_train = parallized_pred(X_train, clf) prediction_time = time.time() - start weight = train_mrf(y_pred_train, img_train, nb_class, max_map_iter,
'learning_rate': 0.05, 'n_estimators': 100, 'max_depth': 4, 'subsample': 0.5, 'n_jobs': 4, 'min_child_weight': 15 } train_params = {'early_stopping_rounds': 10, 'verbose': 0} xgb = XGBClassifier(**params) xgb.set_params(**train_params) xgb_kin = clf.Classifier(model=xgb, cv=skf, variables=var_kin, model_name='XGBoost', var_name='kinetic', fig_name='xgb', train_params=train_params) xgb_kin.fit(train) xgb_kin.check_ks_and_cvm(train, check_agreement=check_agreement, check_correlation=check_correlation) xgb_kin.predict(data=test) params = { 'learning_rate': 0.05, 'n_estimators': 200, 'max_depth': 4, 'subsample': 0.5, 'n_jobs': 4, 'min_child_weight': 15
def classify_context(self, raw_text=None, type="string"): cl_o = cl.Classifier() return cl_o.classifier_handler(raw_text)
else: ########## generate image features and classification evalution ######### generator = Generator(latent_dim,class_embed_dim,feature_dim) if torch.cuda.is_available(): generator = generator.cuda() checkpoint = torch.load(Model_GAN_path) print("===> Loading Wasserstein_GAN Model... Start Epoch:{}".format(checkpoint['epoch'])) generator.load_state_dict(checkpoint['G_state_dict']) if validation: # for fake data generation unique_attributes_val, unique_labels_val = get_unique_vector(data.attributes_val,data.labels_val) gen_features,gen_labels = generate_img_feature(generator,unique_attributes_val,unique_labels_val) gen_labels = map_label(gen_labels.astype(int)) cls = classifier.Classifier(gen_features,gen_labels,data,zsl_classifier_path,lr=0.0001,batch_size=64,epoch=100,validation=True,generalized=False) unseen_acc = cls.unseen_acc else: if generalized: #unique_attributes_trainval, unique_labels_trainval = get_unique_vector(data.attributes_train, data.labels_train) #gen_features_trainval,gen_labels_trainval = generate_img_feature(generator,unique_attributes_trainval,unique_labels_trainval) unique_attributes_test_unseen, unique_labels_test_unseen = get_unique_vector(data.attributes_test_unseen, data.labels_test_unseen) gen_features_test_unseen,gen_labels_test_unseen = generate_img_feature(generator,unique_attributes_test_unseen,unique_labels_test_unseen) features_train = np.concatenate((data.features_train, gen_features_test_unseen), axis=0) labels_train = np.concatenate((data.labels_train,gen_labels_test_unseen),axis=0) labels_train = np.where(labels_train.astype(int)==1)[1] cls = classifier.Classifier(features_train,labels_train,data,gzsl_classifier_path,lr=0.0001,batch_size=64,epoch=100,validation=False,generalized=True) best_H, seen_acc, unseen_acc = cls.best_H, cls.seen_acc, cls.unseen_acc else: unique_attributes_test_unseen, unique_labels_test_unseen = get_unique_vector(data.attributes_test_unseen, data.labels_test_unseen)
import pandas as pd from sklearn.cross_validation import StratifiedShuffleSplit import classifier from sklearn.calibration import CalibratedClassifierCV from sklearn.metrics import accuracy_score, roc_curve, auc target_column_name = 'TARGET' # point it to your training file filename = '../data/public/train.csv' if __name__ == '__main__': df = pd.read_csv(filename) y = df[target_column_name].values X = df.drop(target_column_name, axis=1).values skf = StratifiedShuffleSplit(y, n_iter=2, test_size=0.5, random_state=57) for valid_train_is, valid_test_is in skf: X_valid_train = X[valid_train_is] y_valid_train = y[valid_train_is] X_valid_test = X[valid_test_is] y_valid_test = y[valid_test_is] #clf = model.Classifier() #clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic') clf_c = classifier.Classifier() clf_c.fit(X_valid_train, y_valid_train) y_valid_pred = clf_c.predict(X_valid_test) y_valid_proba = clf_c.predict_proba(X_valid_test) fpr, tpr, _ = roc_curve(y_valid_test, y_valid_proba[:,1]) print 'auc = ', auc(fpr, tpr) print 'accuracy = ', accuracy_score(y_valid_pred, y_valid_test)
def get_news(sources=['spiegel','faz','welt','zeit']): ''' Collects all news articles from political ressort of major German newspapers Articles are transformed to BoW vectors and assigned to a political party For better visualization, articles' BoW vectors are also clustered into topics INPUT folder the model folder containing classifier and BoW transformer sources a list of strings for each newspaper for which a crawl is implemented default ['zeit','sz'] ''' import classifier from bs4 import BeautifulSoup from api import fetch_url import urllib2 articles = [] # the classifier for prediction of political attributes clf = classifier.Classifier(train=False) for source in sources: if source is 'spiegel': # fetching articles from sueddeutsche.de/politik url = 'http://www.spiegel.de/politik' site = BeautifulSoup(urllib2.urlopen(url).read()) titles = site.findAll("div", { "class" : "teaser" }) urls = ['http://www.spiegel.de'+a.findNext('a')['href'] for a in titles] if source is 'faz': # fetching articles from sueddeutsche.de/politik url = 'http://www.faz.net/aktuell/politik' site = BeautifulSoup(urllib2.urlopen(url).read()) titles = site.findAll("a", { "class" : "TeaserHeadLink" }) urls = ['http://www.faz.net'+a['href'] for a in titles] if source is 'welt': # fetching articles from sueddeutsche.de/politik url = 'http://www.welt.de/politik' site = BeautifulSoup(urllib2.urlopen(url).read()) titles = site.findAll("a", { "class" : "as_teaser-kicker" }) urls = [a['href'] for a in titles] if source is 'sz-without-readability': # fetching articles from sueddeutsche.de/politik url = 'http://www.sueddeutsche.de/politik' site = BeautifulSoup(urllib2.urlopen(url).read()) titles = site.findAll("div", { "class" : "teaser" }) urls = [a.findNext('a')['href'] for a in titles] if source is 'zeit': # fetching articles from zeit.de/politik url = 'http://www.zeit.de/politik' site = BeautifulSoup(urllib2.urlopen(url).read()) urls = [a['href'] for a in site.findAll("a", { "class" : "teaser-small__combined-link" })] print "Found %d articles on %s"%(len(urls),url) # predict party from url for this source print "Predicting %s"%source for url in urls: try: title,text = fetch_url(url) prediction = clf.predict(text) prediction['url'] = url prediction['source'] = source articles.append((title,prediction)) except: print('Could not get text from %s'%url) pass # do some topic modeling topics = kpca_cluster(map(lambda x: x[1]['text'][0], articles)) # remove original article text for faster web-frontend for a in articles: a[1]['text'] = 'deleted' # store current news and topics json.dump(articles,open('news.json','wb')) json.dump(topics,open('topics.json','wb'))
import labeler import classifier l1 = labeler.Labeler() list1 = l1.extractAndPrint( "http://money.cnn.com/2018/03/21/technology/mark-zuckerberg-cambridge-analytica-response/index.html" ) print(list1) print("\n") l2 = labeler.Labeler() list2 = l2.extractAndPrint( "https://www.nytimes.com/2018/03/27/world/europe/whistle-blower-data-mining-cambridge-analytica.html" ) print(list2) print("\n") c1 = classifier.Classifier() c1.readAndCompare(list1, list2)
#roc_auc = auc(fpr, tpr) #mean_tpr /= len(cv) #mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC (area = %0.2f) for diseases %d' % (mean_auc, i), lw=2) #plt.legend(loc="lower right") plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC and AUC for 3 diseases') plt.show() if __name__ == '__main__': nb_clf = clf.Classifier('NB') tree_clf = clf.Classifier('Tree') nb_y_hat = cro_vld(nb_clf, 10) tree_y_hat = cro_vld(tree_clf, 10) plot_auc(nb_clf.y, nb_y_hat) plot_auc(tree_clf.y, tree_y_hat)
print('--------------------------') X_train_df = X_df.iloc[train_is].copy() y_train_df = y_df.iloc[train_is].copy() X_test_df = X_df.iloc[test_is].copy() y_test_df = y_df.iloc[test_is].copy() y_train_clf = y_train_df['molecule'].values y_train_reg = y_train_df['concentration'].values y_test_clf = y_test_df['molecule'].values y_test_reg = y_test_df['concentration'].values fe_clf = feature_extractor_clf.FeatureExtractorClf() fe_clf.fit(X_train_df, y_train_clf) X_train_array_clf = fe_clf.transform(X_train_df) X_test_array_clf = fe_clf.transform(X_test_df) clf = classifier.Classifier() clf.fit(X_train_array_clf, y_train_clf) y_proba_clf = clf.predict_proba(X_test_array_clf) y_pred_clf = labels[np.argmax(y_proba_clf, axis=1)] error = 1 - accuracy_score(y_test_clf, y_pred_clf) print('error = %s' % error) fe_reg = feature_extractor_reg.FeatureExtractorReg() for i, label in enumerate(labels): X_train_df.loc[:, label] = (y_train_df['molecule'] == label) X_test_df.loc[:, label] = y_proba_clf[:, i] fe_reg.fit(X_train_df, y_train_reg) X_train_array_reg = fe_reg.transform(X_train_df) X_test_array_reg = fe_reg.transform(X_test_df) reg = regressor.Regressor()
def callback(): # Auth Step 4: Requests refresh and access tokens access_token = request.args['access_token'] # Auth Step 6: Use the access token to access Spotify API authorization_header = {"Authorization": "Bearer {}".format(access_token)} params = {"limit": "50"} # Get profile data user_profile_api_endpoint = "{}/me/top/artists".format(SPOTIFY_API_URL) profile_response = requests.get(user_profile_api_endpoint, params=params, headers=authorization_header) profile_data = json.loads(profile_response.text) print(profile_data) # Combine profile and playlist data to display display_arr = [profile_data] list_of_genres = [a['genres'] for a in profile_data['items']] BB = [] CC = [] for ll in list_of_genres: LL = list(set(ll)) for kk in LL: BB.append(kk.split()) CC.append(BB) BB = [] flat_genres = [ item2 for sublist in CC for item in sublist for item2 in item ] numOfGenres = len(genres) mean_genre = [0] * numOfGenres for genre in flat_genres: genre = clean_genre(genre) try: idx = genres.index(genre) except ValueError: continue mean_genre[idx] += 1 normalizer = sum(mean_genre) if normalizer == 0: normalizer = 1 for i in range(0, len(mean_genre)): mean_genre[i] = mean_genre[i] / normalizer clf = classifier.Classifier(genres, k) maxScores, pred = clf.classify(mean_genre, X, Y) result = [] for j in range(0, 20): for i in range(0, k): for track in Y[1][pred[1][0][i]]: if track['id'] == maxScores[j][0]: #print(track['permalink_url']) #print(maxScores[j][1]) #print(track['id']) #print(track['genre']) result.append(track['stream_url']) break return {"data": result}