def __init__(self, fname, *args, **kargs): Classifier.__init__(self, fname, *args, **kargs) # sometimes a threshold value is trained during Bayesian # classification to avoid classifying too many 'documents' as # one kind or the other self.thresholds = [1.0, 1.0]
def main(): parser = argparse.ArgumentParser(description='Clasificador de musica.\nToma los datos de entrenamiento de un archivo y utiliza algoritmos evolutivos para crear y mejorar las reglas de clasificación.') parser.add_argument('-d', '--data', help='Archivo donde se encuentra la información fuente para el clasificador.') args = vars(parser.parse_args()) """ Los valores default son: tamaño discretizacion - 100 poblacion de generacion - 10 min fitness para terminar - 0.9 numero a seleccionar - 4 porcentaje de mutacion - 0.05 maximo de generaciones - 10000 tipo de seleccion - ROULETTE_WHEEL_SELECTION """ defaults = [100, 10, 0.9, 4, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION] classifier = Classifier(args['data'], discrete_intervals=defaults[0], size_rule_generation=defaults[1], filter_list=["skewness", "spectral_rolloff", "energy", "sv", "spread", "centroid", "obsi", "kurtosis"], log_results=True) start = time.clock() best_results = classifier.train(req_min_fitness=defaults[2], gen_select=defaults[3], mutation_prob=defaults[4], limit_generations=defaults[5]) duration = (time.clock() - start)*1000 print "Duration\t", duration, "ms" print "Training endend." print "Best results:", ', '.join([str(key) + " fitness: " + str(value['fitness']) for key, value in best_results.items()]) print "Testing:" classifier.test() print "Testing ended."
def main(): me=Classifier() feature_counter=Counter() feature_set=pickle.load(open('validation_set.pkl', 'rb')) feature_set_labels=[] for tweet, rating in feature_set: print rating try: float(rating) except: continue if float(rating)>0: label='positive' elif float(rating)<0: label='negative' else: label='neutral' feature_set_labels.append((tweet, label)) feature_list=chain.from_iterable([word_tokenize(process_tweet(tweet)) for tweet, sentiment in feature_set_labels]) for feat in feature_list: feature_counter[feat]+=1 me.feature_list=[feat for feat, count in feature_counter.most_common(1000)] ts=[(me.extract_features(tweet), label) for tweet, label in feature_set] print 'training Maxent' me.classifier=MaxentClassifier.train(ts) return me
def main(): dbinfo = recover() conn = MySQLdb.connect(**dbinfo) cur = conn.cursor() #Learn sql = "SELECT id,article_text,trainpos,trainneg,trainneutral FROM articles WHERE trainset=1 AND (trainpos>0 OR trainneg>0 OR trainneutral>0)" cur.execute(sql) a = Learner() for aid,article_text,trainpos,trainneg,trainneutral in cur.fetchall(): aid = int(aid) items = [ (1, int(trainpos)),(0, int(trainneutral)),(-1, int(trainneg)) ] classification = max(items, key=lambda x : x[1])[0] a.add_string(article_text, classification) a.train() #Predict sql = "SELECT id,article_text FROM articles" cur.execute(sql) b = Classifier(a) for aid,article_text in cur.fetchall(): aid = int(aid) classification = b.classify(article_text) sql = "UPDATE articles SET score=%s WHERE id=%s" args = [classification,aid] cur.execute(sql,args) print aid,classification conn.commit()
def eval_classifier(classifierToUse, featuresToUse, testOrTrain="train"): print("Chosen feature: {0}".format(featuresToUse) ) print("Chosen classifier: {0}".format(classifierToUse)) fe = FeatureExtractor(featuresToUse) dataset = DataSet(fe) classifier = Classifier() evaluate = Evaluation() print "test or Train %s" % testOrTrain for feature_class, files in getTestData(testOrTrain).items(): print "%s" % testOrTrain for f in files: dataset.addFile(feature_class, f) print "Dataset initialized" print_class_stats(dataset.classes) print "Test set created." a_train, a_test, c_train, c_test = train_test_split(dataset.featureVector, dataset.classes, test_size=0.9) c_pred = classifier.classification(a_train,a_test,c_train,c_test,classifierToUse) evaluate.evaluate(c_pred,c_test,featuresToUse,classifierToUse)
def create_predict(HudongItem_csv): # 读取neo4j内容 db = Neo4j() db.connectDB() data_set = db.getLabeledHudongItem('labels.txt') classifier = Classifier('wiki.zh.bin') classifier.load_trainSet(data_set) classifier.set_parameter(weight=[1.0, 3.0, 0.2, 4.0, 0],k=10) predict_List = readCSVbyColumn(HudongItem_csv, 'title') file_object = open('predict_labels2.txt','a') count = 0 vis = set() for p in predict_List: cur = HudongItem(db.matchHudongItembyTitle(p)) count += 1 title = cur.title if title in vis: continue vis.add(title) label = classifier.KNN_predict(cur) print(str(title)+" "+str(label)+": "+str(count)+"/"+str(len(predict_List))) file_object.write(str(title)+" "+str(label)+"\n") file_object.close() #create_predict('hudong_pedia2.csv')
def __init__(self, D, H, W, K, iternum): Classifier.__init__(self, D, H, W, K, iternum) self.L = 100 # size of hidden layer """ Layer 1 Parameters """ # weight matrix: [M * L] self.A1 = 0.01 * np.random.randn(self.M, self.L) # bias: [1 * L] self.b1 = np.zeros((1,self.L)) """ Layer 3 Parameters """ # weight matrix: [L * K] self.A3 = 0.01 * np.random.randn(self.L, K) # bias: [1 * K] self.b3 = np.zeros((1,K)) """ Hyperparams """ # learning rate self.rho = 1e-2 # momentum self.mu = 0.9 # reg strencth self.lam = 0.1 # velocity for A1: [M * L] self.v1 = np.zeros((self.M, self.L)) # velocity for A3: [L * K] self.v3 = np.zeros((self.L, K)) return
def runNeuralNetwork(train, test, batchSize, classNum, hLayer=None, mode=None, momentumFactor=0.0): """ A function that call the the classifier to train a learning model. Args: train: training examples (numpy) test: testing examples (numpy) batchSize: the number of training example for each iteration classNum: the number of classes hLayer: number of the hidden layer nodes (list) mode: weight initializing mode momentumFactor: momentum factor """ print "" print "Neural Network ==============================" print " - number of hidden layer nodes:", if hLayer is not None: print hLayer else: print " default (one hidden layer with node number = 2 * feature number)" print " - weight initialization mode:", if mode is not None: print mode else: print "default" print " - momentum factor", momentumFactor nn = Classifier("neural_network", hidden_layer=hLayer, weightInitMode=mode, momentumFactor=momentumFactor) nn.train(train, test, classNum, batchSize) nn.test(test, "test")
def build_model_mnist(): # CNN filter_size = (5, 5) activation = Rectifier().apply pooling_size = (2, 2) num_filters = 50 layer0 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters, pooling_size=pooling_size, weights_init=Uniform(width=0.1), biases_init=Uniform(width=0.01), name="layer_0") filter_size = (3, 3) activation = Rectifier().apply num_filters = 20 layer1 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters, pooling_size=pooling_size, weights_init=Uniform(width=0.1), biases_init=Uniform(width=0.01), name="layer_1") conv_layers = [layer0, layer1] convnet = ConvolutionalSequence(conv_layers, num_channels= 1, image_size=(28, 28)) convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) mlp = MLP(activations=[Identity()], dims=[output_dim, 10], weights_init=Uniform(width=0.1), biases_init=Uniform(width=0.01), name="layer_2") mlp.initialize() classifier = Classifier(convnet, mlp) classifier.initialize() return classifier
def average_multiple_runs(num_runs, options, args): for num, option in enumerate(options): print "Running", num_runs, "iterations with options:", option list_best_results = [] list_test_results = [] list_correct_results = [] for i in range(num_runs): print "Running #" + str(i + 1) classifier = Classifier(args['data'], discrete_intervals=option[0], size_rule_generation=option[1], filter_list=["skewness", "spectral_rolloff", "energy", "sv", "spread", "centroid", "obsi", "kurtosis"], log_results=False) best_results = classifier.train(req_min_fitness=option[2], gen_select=option[3], mutation_prob=option[4], limit_generations=option[5], selection_type=option[6]) test_results, correct_results = classifier.test() list_best_results.append(best_results) list_test_results.append(test_results) list_correct_results.append(correct_results) print "Results for option: ", option print "run\ttype\tgen\tfitness" for i, results in enumerate(list_best_results): for rule, result in results.items(): print str(i + 1) + "\t" + rule[:7] + "\t" + str(result["generation"]) + "\t" + str(result["fitness"]) print "run\ttype\tavg correct rules" for i, results in enumerate(list_test_results): for avg_map in results: print str(i + 1) + "\t" + avg_map.keys()[0][:7] + "\t" + str(avg_map[avg_map.keys()[0]]) print "run\ttype\tavg correct results" for i, results in enumerate(list_correct_results): for avg_map in results: print str(i + 1) + "\t" + avg_map.keys()[0][:7] + "\t" + str(avg_map[avg_map.keys()[0]])
def run(self): """ Function: Run ------------- This function will evaluate your solution! You do not need to write any code in this file, however you SHOULD understand this function! """ print "Running the full pipeline!" K=25 trainImages = util.loadTrainImages()[:1000] testImages = util.loadTestImages() classifier = Classifier() print 'Training..........' classifier.train(trainImages, K) trainPredictions = classifier.test(trainImages) trainAccuracy = self.evaluate(trainPredictions, trainImages) print 'Testing...........' testPredictions = classifier.test(testImages) testAccuracy = self.evaluate(testPredictions, testImages) print 'All done. Here is your summary:' self.reportAccuracy(trainAccuracy, 'Train Accuracy') self.reportAccuracy(testAccuracy, 'Test Accuracy')
def GetNewArticles(request): # Get the articles from RSS # aggregator = NewsAggregator() # list_of_articles = aggregator.feedreader() classifier = Classifier("filename.pkl") # Predict list_of_classes = [] # with open("articles_dump", "wb") as dump: # pickle.dump(list_of_articles, dump, pickle.HIGHEST_PROTOCOL) with open("articles_dump") as dump: list_of_articles = pickle.load(dump) for article in list_of_articles: list_of_classes.append(article["content"]) # print list_of_classes res = classifier.predict(np.asarray(list_of_classes)) for i in range(0, len(list_of_articles)): if res[i] == 1: cat = "Sports" elif res[i] == 2: cat = "Economy_business_finance" elif res[i] == 3: cat = "Science_technology" else: cat = "Lifestyle_leisure" element = list_of_articles[i] list_of_articles[i]["category"] = cat article = Article(article_title=element["title"], article_content=element["content"], article_category=cat) article.save() json_object = json.dumps(list_of_articles) return HttpResponse(json_object)
def test_classify_by_randomforest(): stock_d = testdata() ti = TechnicalIndicators(stock_d) filename = 'test_N225_randomforest.pickle' clffile = os.path.join(os.path.dirname( os.path.abspath(__file__)), '..', 'clf', filename) if os.path.exists(clffile): os.remove(clffile) clf = Classifier(filename) ti.calc_ret_index() ret = ti.stock['ret_index'] train_X, train_y = clf.train(ret, classifier="Random Forest") eq_(filename, os.path.basename(clf.filename)) r = round(train_X[-1][-1], 5) expected = 1.35486 eq_(r, expected) r = round(train_X[0][0], 5) expected = 1.08871 eq_(r, expected) expected = 14 r = len(train_X[0]) eq_(r, expected) expected = 120 r = len(train_X) eq_(r, expected) expected = [1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0] for r, e in zip(train_y, expected): eq_(r, e) expected = 1 test_y = clf.classify(ret) assert(test_y[0] == 0 or test_y[0] == 1) if os.path.exists(clffile): os.remove(clffile)
def main(mode='test'): cl = Classifier() cl.create_db('bunyk.db') if mode == 'test': test(cl) else: train(cl, 'http://bunyk.wordpress.com')
def setUp(self): text = u"Comment Google classe les pages Internet" c = Classifier(CleanTextUtil("french")) c.add_text(text) self.dictionary_db = c.dictionary_db self.vi = VectorItem("googl", "1")
def askfunc(): options=\ { "login": False, "username": "", "status": 0 } error = None if session.has_key('username'): options["login"]=True options["username"]=session["username"] else: return render_template('errorpage.html', error = error) if request.method=='POST': op=request.form['op'] print ("enter the post") if op == "submit": newpic = request.files['photo'] picAdded = True; cur=mysql.connection.cursor() cur.execute("select userid from user where username = "******"'" + session["username"] + "'") useridresult = []; useridresult=cur.fetchall() userid = useridresult[0][0] userid = str(userid) currentuserid = userid; currentuserid = str(currentuserid) pic_location = "static/pictures/" + currentuserid + "/" +newpic.filename newpic.save(pic_location) title = request.form['title'] ori = request.form['ori'] ori = str(ori) print ori if ori == "I dont know": #TODO: update with proper default value print ("classifier entered") cc = Classifier(pic_location) ori = cc.classify_text() ori = ori.title() tar = request.form['tar'] #if tar == 'NA' des = request.form['des'] cur=mysql.connection.cursor() cur.execute("insert into post (title, description, origin, target, pathtophoto, userid) values ('" + title + "', '" + des + "', '" + ori + "', '" + tar + "', '" + newpic.filename +"', "+ currentuserid +");") print ("insert into post (title, description, origin, target, pathtophoto, userid) values ('" + title + "', '" + des + "', '" + ori + "', '" + tar + "', '" + newpic.filename +"', "+ currentuserid +");") cur.execute("commit") return render_template('redirect.html', error=error) return render_template('ask.html', error=error, **options)
def test_combinations(args, graph=False): py = plotly.plotly(username='******', key='uzkqabvlzm', verbose=False) options = [100, 10, 0.9, 4, 0.05, 10000] features = ["skewness", "spectral_rolloff", "energy", "sv", "spread", "centroid", "zcr", "obsi", "kurtosis"] electronic_y = [] classical_y = [] categories = [] print '\t'.join([feature[:2] for feature in features] + ["meta", "acou", "regg", "elec", "class"]) for i in range(1, len(features) + 1): combinations = [list(comb) for comb in itertools.combinations(features, i)] for comb in combinations: comb_name = ', '.join(comb) classifier = Classifier(args['data'], discrete_intervals=options[0], size_rule_generation=options[1], filter_list=comb) top_fitness = classifier.train(req_min_fitness=options[2], gen_select=options[3], mutation_prob=options[4], limit_generations=options[5]) for feature in features: if feature in comb: sys.stdout.write("X\t") else: sys.stdout.write("\t") sys.stdout.write(str(top_fitness['metal']["fitness"])[:4] + "\t") sys.stdout.write(str(top_fitness['acoustic']["fitness"])[:4] + "\t") sys.stdout.write(str(top_fitness['reggae']["fitness"])[:4] + "\t") sys.stdout.write(str(top_fitness['electronic']["fitness"])[:4] + "\t") sys.stdout.write(str(top_fitness['classical']["fitness"])[:4] + "\n") if graph: print "Training ended\nFinal fitness:", top_fitness electronic_y.append(top_fitness['metal']) classical_y.append(top_fitness['classical']) categories.append(comb_name) if len(categories) > 20: electronic = { "name": "Metal", "x": categories, "y": electronic_y, "type": "bar" } classical = { "name": "Classical", "x": categories, "y": classical_y, "type": "bar" } layout = { "barmode": "group", 'xaxis': {'type': 'combination'}, 'catagories': categories } response = py.plot([electronic, classical], layout=layout) print response['url'] electronic_y = [] classical_y = [] categories = []
def cl_button_clicked_cb(self, button): """Classify button callback :param button: signal came from this button """ if not len(self.sel_files): return self.counter = -1 for row in self.sel_files: Classifier.classify(self.all_files[row], MainWindow.SR, row, self.update_classify_progress_cb)
def test_classify(): proxy = ReviewsMongoProxy("tripadvisor_train") review = proxy.find_review_by_id(proxy.next_random_review_id()) classifier = Classifier("../tripadvisor/aspect_nltk_nb.pkl") classifier.classify(review) print_review(review)
def classifier(search_query) : cls = Classifier(' '.join(search_query.split('_'))) classified_output = cls.classify() if classified_output != None and len(classified_output) > 0 : with open("output/" + search_query+".json","w") as out : out.write(json.dumps(classified_output)) return json.dumps({"query" : search_query, "status": "Success"}) else : return json.dumps({"query" : search_query, "status": "Failed"})
def main(c = "decision_tree", option = "IG", dataset = "iris", ratio = 0.8): classifier_types = {0: "decision_tree", 1: "naive_bayes", 2: "neural_net"} options = {0:["IG", "IGR"], 1:["normal"], 2:["shallow", "medium"]} ratio = float(ratio) if dataset == "monks": (training, test) = load_data.load_monks(ratio) elif dataset == "congress": (training, test) = load_data.load_congress_data(ratio) elif dataset == "iris": (training, test) = load_data.load_iris(ratio) else: print "Error: Cannot find dataset name." return print "Training... Please hold." # classifier_types = {0: "decision_tree", 2: "neural_net"} # options = {0:["IG", "IGR"], 2:["shallow", "medium"]} # (training, test) = load_data.load_iris(0.8) # nn_classifier = Classifier(classifier_type="neural_net", option = "medium") # nn_classifier.train(training) # nn_classifier.test(test) # print test # (training, test) = load_data.load_congress_data(0.8) # print test # (training, test) = load_data.load_monks(1) # print test # (training, test) = load_data.load_iris(0.8) # print training # "option = IG/IGR" # dt_classifier = Classifier(classifier_type="decision_tree", weights=[], option="IG") # dt_classifier.train(training) # dt_classifier.test(test) # for i, c in classifier_types.iteritems(): # for option in options[i]: print " " print "=================================================================" print "Dataset = ", dataset print "Classifier = ", c print "Option = ", option classifier = Classifier(classifier_type=c, weights = [], option = option) classifier.train(training) classifier.test(test) print "=================================================================" print " " # option value could be either shallow(3 layers) or medium(5) # nn_classifier = Classifier(classifier_type="neural_net", option = "medium") # nn_classifier.train(training) # nn_classifier.test(test) return
def test_performance(args, num_runs): #Features: features = ["skewness", "spectral_rolloff", "energy", "sv", "spread", "centroid", "zcr", "obsi", "kurtosis"] option = [100, 10, 0.9, 2, 0.05, 1000, selection.ROULETTE_WHEEL_SELECTION] for i in range(1, len(features) + 1): print "Num of features:", i for num_run in range(num_runs): classifier = Classifier(args['data'], discrete_intervals=option[0], size_rule_generation=option[1], filter_list=features[:i], log_results=False) start = time.clock() classifier.train(req_min_fitness=option[2], gen_select=option[3], mutation_prob=option[4], limit_generations=option[5], selection_type=option[6]) duration = (time.clock() - start)*1000 print num_run, "\t", duration
def main(): me=Classifier() feature_counter=Counter() feature_set=pickle.load(open('undersampled_emoticon.pkl', 'rb')) feature_list=chain.from_iterable([word_tokenize(process_tweet(tweet)) for tweet, sentiment in feature_set]) for feat in feature_list: feature_counter[feat]+=1 me.feature_list=[feat for feat, count in feature_counter.most_common(1000)] ts=[(me.extract_features(tweet), label) for tweet, label in feature_set] print 'training Maxent, algorithm CG' me.classifier=MaxentClassifier.train(ts) return me
def makeClassifier(): jiraGitMapper = Mapper() # Create a mapping of jira commits to git tickets ticketsToCommits = jiraGitMapper.mapCommitsToTickets(gitData, jiraData, "SONAR-") # Take the git commits and associate them with java class names ticketsAndCommitsToClasses = jiraGitMapper.mapCommitsToClasses(ticketsToCommits) ticketsToClasses = ticketsAndCommitsToClasses[0] commitsToClasses = ticketsAndCommitsToClasses[1] classifier = Classifier() results = classifier.classifyClasses(ticketsToClasses) # results = classifier.randomClassifyClasses(ticketsToClasses) print("Precision: %.3f, Recall: %.3f, Accuracy: %.3f, f1 score: %.3f, hamming loss: %.3f" % (results[0], results[1], results[2], results[3], results[4]))
def get_predictions(sample_x): c=Classifier() predictions=[] try: for x in sample_x: row=x labels=get_labels(x) p=c.get_prediction(row,labels) p=1 if p else 0 predictions.append(p) except Exception as e: show_exception(e) return [] return predictions
def run(self): global worksqueue, spectImg classifier = Classifier() while True: sample = worksqueue.get() worksqueue.task_done() result = classifier.classify(sample) print "\nPreciction: %s\n" % result spectImg = writeMFCC(sample, RATE)
def main(): args = parser.parse_args() data_json = read_dataset(args.data) processor = TextProcessor() classifier = Classifier(processor) classifier.train(data_json) serialized_classifier = classifier.dump() ensure_directory(args.output) with open(args.output, 'w') as f: f.write(serialized_classifier) f.write(os.linesep)
def test(args): test_performance(args, 5) return #return options = [ [100, 10, 0.9, 4, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], # discrete_intervals, size_rule_generation, req_min_fitness, gen_select, limit_generations [1000, 10, 0.9, 4, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], [100, 20, 0.9, 4, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], [100, 5, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], [100, 10, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], [100, 10, 0.9, 6, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], [200, 50, 0.9, 10, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], [300, 10, 0.9, 4, 0.1, 10000, selection.ROULETTE_WHEEL_SELECTION], [500, 15, 0.9, 2, 0.005, 10000, selection.ROULETTE_WHEEL_SELECTION], [50, 20, 0.9, 4, 0.1, 10000, selection.ROULETTE_WHEEL_SELECTION] ] #prueba Tamaño de población options = [ [100, 5, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], [100, 10, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], [100, 15, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], [100, 20, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], [100, 30, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], [100, 50, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION] ] #prueba Proceso de seleccion options = [ [100, 10, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], [100, 10, 0.9, 2, 0.05, 10000, selection.RANK_SELECTION], [100, 10, 0.9, 2, 0.05, 10000, selection.TOURNAMENT_SELECTION] ] options = [ [100, 10, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION], ] average_multiple_runs(30, options, args) test_combinations(args) for num, option in enumerate(options): print "Option num:", num, ", val:", option classifier = Classifier(args['data'], discrete_intervals=option[0], size_rule_generation=option[1], filter_list=["skewness", "spectral_rolloff", "energy", "sv", "spread", "centroid", "obsi", "kurtosis"], log_results=True) best_results = classifier.train(req_min_fitness=option[2], gen_select=option[3], mutation_prob=option[4], limit_generations=option[5]) print "Testing" classifier.test() # classifier.guess_genre([7.53659769442,1389.49121537,0.0166588959174,0.355062895642,1480.75635175,769.172547276,3.47303203307,69.8220939453]) print "Training ended\nFinal fitness:", best_results
class EmojiRecommender(): def __init__(self, fname_model, fname_embed, fname_dataset): print >> sys.stderr, 'EmojiRecommender: [info] loading word index...' self.windexer = WordIndexer.load(fname_embed) print >> sys.stderr, 'EmojiRecommender: [info] loading model...' self.clf = Classifier() self.clf.load_model(fname_model) print >> sys.stderr, 'EmojiRecommender: [info] loading emojis...' ecode_split = cPickle.load(open(fname_dataset, 'r')) self.emojis = [emo for emo, split in ecode_split] self.ydim = len(self.emojis) print >> sys.stderr, 'EmojiRecommender: [info] initialization done' def preprocess(self, text): text = text.decode('utf8') seq = zhtokenizer.tokenize(text) idxs = self.windexer.seq2idx(seq) return idxs def predict_proba(self, text): idxs = self.preprocess(text) if len(idxs) == 0: return None else: return self.clf.predict_proba(idxs) def recommend(self, text, n = 5): proba = self.predict_proba(text) if proba is None: eids = [i for i in range(n)] scores = [0. for i in range(n)] else: ranks = [(i, proba[i]) for i in range(self.ydim)] ranks = sorted(ranks, key = lambda k:-k[1]) eids = [ranks[i][0] for i in range(n)] scores = [ranks[i][1] for i in range(n)] res = [{'emoji':self.emojis[eid], 'score':'%.2f'%(score)} for eid, score in zip(eids, scores)] return res
def runDev(self): print "Running in development mode" K=5 trainImages = util.loadTrainImages()[:100] testImages = util.loadTestImages()[:100] classifier = Classifier() print 'Training..........' classifier.train(trainImages, K) trainPredictions = classifier.test(trainImages) trainAccuracy = self.evaluate(trainPredictions, trainImages) print 'All done. Here is your summary:' self.reportAccuracy(trainAccuracy, 'Train Accuracy')
fscore_top100 = Queue.Queue() fscore_feat = Queue.Queue() fscore_nofeat = Queue.Queue() else: fscore_top100 = np.zeros(num_folds) fscore_feat = np.zeros(num_folds) fscore_nofeat = np.zeros(num_folds) for fold in range(num_folds): print "Training and testing fold " + str(fold + 1) + "..." # Split dataset into train and set based on current fold train_set, train_labels, test_set, test_labels = utils.split_set( full_set, labels, thresholds[fold], thresholds[fold + 1]) if args.t: t_feat = Thread(target=Classifier(clf(), True, False, args.t).learn_classifier, args=(train_set, train_labels, test_set, test_labels, fscore_feat)) t_nofeat = Thread(target=Classifier(clf(), False, False, args.t).learn_classifier, args=(train_set, train_labels, test_set, test_labels, fscore_nofeat)) t_100 = Thread(target=Classifier(clf(), True, True, args.t).learn_classifier, args=(train_set, train_labels, test_set, test_labels, fscore_top100)) t_feat.start() t_nofeat.start() t_100.start() t_feat.join() t_nofeat.join()
def test_with_image(img_path): VGG_Face = Vgg_face_dag() VGG_Face = vgg_face_dag(VGG_Face, "src/models/vgg_face_dag.pth") thicc = 2 score = 0 # To evaluate the state of the driver(drowsy or not) frame_count = 0 frames = [] path = os.getcwd() font = cv2.FONT_HERSHEY_COMPLEX_SMALL img = cv2.imread(img_path) height, width = img.shape[:2] classifier = Classifier(img) left_eye_pred = classifier.left_eye() right_eye_pred = classifier.right_eye() frames.append(img) drunk_pred = classifier.drunk_pred(frames, VGG_Face) if drunk_pred == 1: cv2.putText(img, "Drunk", (10, 20), font, 1, (255, 255, 255), 1, cv2.LINE_AA) else: cv2.putText(img, "Sober", (10, 20), font, 1, (255, 255, 255), 1, cv2.LINE_AA) if left_eye_pred == 0 and right_eye_pred == 0: score += 1 cv2.putText(img, "Asleep", (10, height - 20), font, 1, (255, 255, 255), 1, cv2.LINE_AA) else: score = -1 cv2.putText(img, "Awake", (10, height - 20), font, 1, (255, 255, 255), 1, cv2.LINE_AA) if score < 0: score = 0 cv2.putText(img, "Score: " + str(score), (100, height - 20), font, 1, (255, 255, 255), 1, cv2.LINE_AA) if score > 8: # Using 15 as threshold to say the driver has had his/her eyes closed for too long # Driver is feeling sleepy so we play the alarm cv2.imwrite(os.path.join(path, str(datetime.now) + '.jpg'), img) #playsound() # Play sound if thicc < 16: thicc += 2 else: thicc -= 2 if thicc < 2: thicc = 2 cv2.readOpticalFlow(img, (0, 0), (width, height), (0, 0, 255), thicc) cv2.imshow('frame', img) k = cv2.waitKey(0) if k == 27: cv2.destroyAllWindows()
import os import time from flask import Flask, request, redirect, url_for from classifier import Classifier UPLOAD_FOLDER = 'uploads/' app = Flask(__name__) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER cf = Classifier() @app.route('/', methods=['GET', 'POST']) def upload_file(): if request.method == 'POST': file = request.files['file'] filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename) file.save(filepath) print(filepath) res = cf.classify(filepath) print(res) if res is not None: return res else: return 'nope' return ''' <!doctype html> <title>Upload</title> <h1>Upload image</h1> <form method=post enctype=multipart/form-data>
from classifier import Classifier import numpy import pandas as pd #prepare data data = numpy.load("data.npz") train = data['train'] test = data['test'] labels = data['labels'] #create classifier clf = Classifier() clf.TreeClassifier() clf.load_data(training=train, labels=labels, test=test) results = clf.predict() df = pd.read_csv("pair&average.csv", sep='\t') def TF(x): if x == 0: return False else: return True results = [TF(x) for x in results] TF = pd.Series(results) df['Need_normalize'] = TF newdf = df[df['Need_normalize'] == True]
def main(displayHistory=True): #Window for past frames framesDiffHistory = [(getBlankFrameDiff(), getBlankFrameDiff()) for i in range(framesInHistory)] lastEyes = None #Load model classifier classifier = Classifier() #Start thread to make predictions classifier.startPredictions() #Initialize webcam vs = WebcamVideoStream(src=0).start() #For FPS computation t0 = -1 #Face/eyes detector detector = Detector() print "Starting eye recognition..." while True: #Compute FPS dt = time.time() - t0 fps = 1 / dt t0 = time.time() #Limit FPS with wait waitMs = 5 key = cv2.waitKey(waitMs) & 0xFF #Get image from webcam, convert to grayscale and resize fullFrame = vs.read() fullFrame = cv2.cvtColor(fullFrame, cv2.COLOR_BGR2GRAY) frame = imutils.resize(fullFrame, width=300) #Find face faceBB = detector.getFace(frame) if faceBB is None: #Invalidate eyes bounding box as all will change lastEyes = None detector.resetEyesBB() continue #Get low resolution face coordinates x, y, w, h = faceBB face = frame[y:y + h, x:x + w] #Apply to high resolution frame xScale = fullFrame.shape[1] / frame.shape[1] yScale = fullFrame.shape[0] / frame.shape[0] x, y, w, h = x * xScale, y * yScale, w * xScale, h * yScale fullFace = fullFrame[y:y + h, x:x + w] #Find eyes on high resolution face eyes = detector.getEyes(fullFace) if eyes is None: #Reset last eyes lastEyes = None continue eye0, eye1 = eyes #Process (normalize, resize) eye0 = process(eye0) eye1 = process(eye1) #Reshape for dataset eye0 = np.reshape(eye0, [datasetImageSize, datasetImageSize, 1]) eye1 = np.reshape(eye1, [datasetImageSize, datasetImageSize, 1]) #We have a recent picture of the eyes if lastEyes is not None: #Load previous eyes eye0previous, eye1previous = lastEyes #Compute diffs diff0 = getDifferenceFrame(eye0, eye0previous) diff1 = getDifferenceFrame(eye1, eye1previous) #Display/debug displayDiff = False if displayDiff: displayCurrentDiff(eye0, eye1, eye0previous, eye1previous, stopFrame=False) #Crop beginning then add new to end framesDiffHistory = framesDiffHistory[1:] framesDiffHistory.append([diff0, diff1]) #Keep current as last frame lastEyes = [eye0, eye1] #Note: this is not time consuming if displayHistory: displayHistoryDiffs(framesDiffHistory, fps) #Extract each eyes X0, X1 = zip(*framesDiffHistory) #Reshape as a tensor (NbExamples,SerieLength,Width,Height,Channels) X0 = np.reshape(X0, [ -1, len(framesDiffHistory), datasetImageSize, datasetImageSize, 1 ]) X1 = np.reshape(X1, [ -1, len(framesDiffHistory), datasetImageSize, datasetImageSize, 1 ]) #Save history to Classifier classifier.X0 = X0 classifier.X1 = X1
class Spider(object): def __init__(self): self.verifyCodeUrl = "http://jwgl.buct.edu.cn/CheckCode.aspx" #验证码获取地址 self.jwglLoginUrl = "http://jwgl.buct.edu.cn/default2.aspx" #教务网登录地址 self.getGradeUrl = "http://jwgl.buct.edu.cn/xscjcx.aspx" #成绩获取地址 self.getScheduleUrl = "http://jwgl.buct.edu.cn/xskbcx.aspx" #课程表获取地址 self.postClassUrl = "http://jwgl.buct.edu.cn/xsxk.aspx" #选课地址 self.studentID = #学号 self.username = #姓名 self.jwglPassword = #教务网密码 self.major = '0202高分子材料与工程' self.session = requests.Session() #实例化 session 对象 self.response = self.session.send(self.prepareJwglFirst(), timeout=5) # GET 方法获取登录网站的 '__VIEWSTATE' # 实例化验证码识别器对象 from classifier import Classifier self.classifier = Classifier() self.classifier.loadTrainingMat() self.remainList = [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 14] def formatHeaders(self, referer=None): """ 生成请求的 headers,referer 参数的默认值为 None 若 referer 为 None,则 headers 不包括 referer 参数 """ headers = { 'Host': 'jwgl.buct.edu.cn', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', 'Connection': 'keep-alive', 'Upgrade-Insecure-Request': '1', } if referer: headers['Referer'] = referer return headers def getVIEWSTATE(self): """ 正则获取登录页面的 "__VIEWSTATE" """ import re return re.findall('<.*name="__VIEWSTATE".*value="(.*)?".*/>', self.response.text)[0] def prepareJwglFirst(self): headers = self.formatHeaders() req = Request('GET', self.jwglLoginUrl, headers=headers) return self.session.prepare_request(req) def prepareJwglLogin(self): """ 实例化登录 jwgl 需要的 request """ postdata = { '__VIEWSTATE': self.getVIEWSTATE(), #此参数非常重要,通过函数从当前网页源代码获取 'txtUserName': self.studentID, 'TextBox2': self.jwglPassword, 'txtSecretCode': self.verCode, 'RadioButtonList1': '学生', 'Button1': '', 'lbLanguage': '', 'hidPdrs': '', 'hidsc': '', } headers = self.formatHeaders(self.jwglLoginUrl) req = Request('POST', self.jwglLoginUrl, headers=headers, data=postdata) return self.session.prepare_request(req) def prepareGetGrade(self): headers = self.formatHeaders(self.response.url) params = { 'xh': self.studentID, 'xm': self.username, 'gnmkdm': 'N121605', } req = Request('GET', self.getGradeUrl, headers=headers, params=params) return self.session.prepare_request(req) def preparePastGrade(self): headers = self.formatHeaders(self.response.url) params = { 'xh': self.studentID, 'xm': self.username, 'gnmkdm': 'N121605', } postdata = { '__EVENTTARGET': '', '__EVENTARGUMENT': '', '__VIEWSTATE': self.getVIEWSTATE(), #此参数非常重要,通过函数从当前网页源代码获取 'hidLanguage': '', 'ddlXN': '', 'ddlXQ': '', 'ddl_kcxz': '', 'btn_zcj': '历年成绩', } req = Request('POST', self.getGradeUrl, headers=headers, params=params, data=postdata) return self.session.prepare_request(req) def prepareSchedule(self): headers = self.formatHeaders(self.response.url) params = { 'xh': self.studentID, 'xm': self.username, 'gnmkdm': 'N121603', } req = Request('GET', self.getScheduleUrl, headers=headers, params=params) return self.session.prepare_request(req) def preparePastSchedule(self, xn_, xq_): headers = self.formatHeaders(self.response.url) params = { 'xh': self.studentID, 'xm': self.username, 'gnmkdm': 'N121603', } postdata = { '__EVENTTARGET': 'xnd', '__EVENTARGUMENT': '', '__VIEWSTATE': self.getVIEWSTATE(), #此参数非常重要,通过函数从当前网页源代码获取 'xnd': xn_, 'xqd': xq_, } req = Request('POST', self.getScheduleUrl, headers=headers, params=params, data=postdata) return self.session.prepare_request(req) def prepareClass(self): headers = self.formatHeaders(self.response.url) params = { 'xh': self.studentID, 'xm': self.username, 'gnmkdm': 'N121101', } req = Request('GET', self.postClassUrl, headers=headers, params=params) return self.session.prepare_request(req) def prepareGetClass(self): headers = self.formatHeaders(self.response.url) params = { 'xh': self.studentID, 'xm': self.username, 'gnmkdm': 'N121101', } postdata = { '__EVENTTARGET': '', '__EVENTARGUMENT': '', '__VIEWSTATE': self.getVIEWSTATE(), #此参数非常重要,通过函数从当前网页源代码获取 'DrDl_Nj': self.studentID[:4], 'zymc': self.major + '主修专业||' + self.studentID[:4], 'xx': '', 'Button5': '本专业选课' } req = Request('POST', self.postClassUrl, headers=headers, params=params, data=postdata) return self.session.prepare_request(req) def jwglLogin(self, tryNum=10): """ 教务网登录函数 tryNum --> 尝试登录的最大次数,防止因递归深度过大导致溢出 """ import re tryNum -= 1 if tryNum < 0: print('\n*** stack overflow! exiting...') exit(0) codeImg = self.session.get(self.verifyCodeUrl, timeout=5) #获取验证码图片 with open('check.gif', 'wb') as fr: #保存验证码图片 for chunk in codeImg: fr.write(chunk) self.verCode = self.classifier.recognizer("check.gif") #识别验证码 try: self.response = self.session.send(self.prepareJwglLogin(), timeout=5) if re.search(self.studentID, self.response.url): #若 response.url 中匹配到学号,则认为登录成功 print("login successfully!") print(self.response.url) else: raise VerifyError("Wrong Verification code!") except VerifyError as e: print(e) print("retry...") self.jwglLogin(tryNum) #若登录不成功则递归调用自身 def getPastGrade(self): """ 获取历年成绩 """ self.response = self.session.send(self.prepareGetGrade(), timeout=5) self.response = self.session.send(self.preparePastGrade(), timeout=5) gradeMat = self.formatTable(self.response.text) gradeMat = [[row[i] for i in range(len(row)) if i in self.remainList] for row in gradeMat] self.outputTable(gradeMat, outputPath='grade.md') def getPastSchedule(self, xn_ ,xq_): self.response = self.session.send(self.prepareSchedule(), timeout=5) self.response = self.session.send(self.preparePastSchedule(xn_, xq_), timeout=5) scheduleMat = self.formatTable(self.response.text) with open('schedule.md', 'w') as fr: fr.write(str(scheduleMat)) #self.outputTable(scheduleMat, outputPath='schedule.md') def getClassList(self): self.response = self.session.send(self.prepareClass(), timeout=5) self.response = self.session.send(self.prepareGetClass(), timeout=5) def outputTable(self, tableMat, outputPath): """ 将成绩输出成 md 格式 """ tableMat.insert(1, [':------' for i in range(len(tableMat[0]))]) with open(outputPath, 'w') as fr: for row in tableMat: fr.write('|') for each in row: fr.write(each) fr.write('|') fr.write('\n') def formatTable(self, tableBody): """ 将抓取到的成绩解析成列表 """ from bs4 import BeautifulSoup import re soup = BeautifulSoup(tableBody, 'html.parser') return soup.br.table tableRow = soup.br.table.find_all('tr') tableMat = [i.find_all('td') for i in tableRow] return [[each.get_text().strip() for each in row] for row in tableMat] def clean(self): """ 爬取结束关闭会话 """ self.session.close()
from flask_socketio import SocketIO import socket from PIL import Image import numpy as np import struct import sys import time import log import os from classifier import Classifier from datastore import DataStore classifier = Classifier(16, 16, 3) app = Flask(__name__, static_url_path='') socketio = SocketIO(app) ds = DataStore('/var/pood/ds') last_frame_time = 0 frames_received = 100 positives_last_frame = 0 force_training = False collecting_negs = False def has_cli_arg(arg_str): return arg_str in sys.argv def classify_req(sock): global last_frame_time, frames_received, positives_last_frame, collecting_negs
from itertools import count import torch from torchvision import utils import random import glob from shutil import copyfile from mask_loader import load_image from classifier import Classifier device = torch.device("cuda" if torch.cuda.is_available() else "cpu") CLASSIFIER_FILENAME = 'trained_models/classifier.to' classifier = Classifier() classifier.cuda() classifier.load_state_dict(torch.load(CLASSIFIER_FILENAME)) classifier.eval() file_names = glob.glob('data/raw/**.jpg', recursive=True) while True: file_name = random.choice(file_names) hash = file_name.split('/')[-1][:-4] image = load_image(file_name).to(device) image = classifier.apply(image) if image is None: continue
# game stuff from game_logic import GameLogic from levels import LevelMic from path_collector import PathCollector # yaml config file cfg = yaml.safe_load(open("../config.yaml")) # init path collector path_coll = PathCollector(cfg, root_path='.') # -- # mic # create classifier classifier = Classifier(path_coll=path_coll, verbose=True) # create mic instance mic = Mic(classifier=classifier, feature_params=cfg['feature_params'], mic_params=cfg['mic_params'], is_audio_record=True) # -- # game setup # init pygame pygame.init() # init display screen = pygame.display.set_mode(cfg['game']['screen_size'])
def train(): if args.dataset == 'baidu_VH': dataset = baidu_VH(PROJECT_METAROOT) elif args.dataset == 'summe': pass #dataset= else: raise ValueError('No such dataset') log.l.info(dataset.print_info()) train_data = AsyncReader(dataset, root_path=BAIDU_VH_ROOT, mode='train', modality=args.modality) train_data.set_params({ 'limitedfiles': None, 'sample_rate': 100, 'save_path': 'tmp_results/train_{}_sampled.pkl'.format(args.modality) }) X_train, Y_train = train_data.read_data(k=args.thread) val_data = AsyncReader(dataset, root_path=BAIDU_VH_ROOT, mode='val', modality=args.modality) val_data.set_params({ 'limitedfiles': None, 'sample_rate': 1, 'save_path': 'tmp_results/val_{}_sampled.pkl'.format(args.modality) }) X_val, Y_val = val_data.read_data(k=args.thread) model = Classifier(model_name=args.model_name, if_grid_search=args.if_grid_search, model_kernel=args.model_kernel) if args.if_grid_search: model.set_grid_search_params(grid_search_params[args.model_name]) X_train_grid_search, Y_train_grid_search = Sample_data( X_train, Y_train, args.grid_search_sample_rate) model.grid_search(X_train_grid_search, Y_train_grid_search) model.fit(X_train, Y_train) X_val_metric, Y_val_metric = Sample_data(X_val, Y_val, 0.1) predict_val = model.predict(X_val_metric) metrics = get_metrics(predict_val, Y_val_metric, metrics=METRICS) # print metrics log.l.info('the metrics of {} is :{}'.format(METRICS, metrics)) del X_train, Y_train #,X_train_grid_search,Y_train_grid_search,X_val_metric,Y_val_metric if args.create_curves: # for test set: val_curves_dic = dict() for k, v in val_data.data_dic.items(): val_curves_dic[k] = model.predict(v) test_data = AsyncReader(dataset, root_path=BAIDU_VH_ROOT, mode='test', modality=args.modality) test_data.set_params({ 'limitedfiles': None, 'sample_rate': 1, 'save_path': 'tmp_results/test_{}_sampled.pkl'.format(args.modality) }) _, _ = test_data.read_data(k=args.thread) test_curves_dic = dict() for k, v in test_data.data_dic.items(): test_curves_dic[k] = model.predict(v) return_info = {'val': val_curves_dic, 'test': test_curves_dic} if args.save_curves: joblib.dump( return_info, 'tmp_results/val_test_{}_curves.pkl'.format(args.modality)) return return_info return None
# Library import import uvicorn from fastapi import FastAPI, HTTPException, Request from fastapi.templating import Jinja2Templates from fastapi.responses import HTMLResponse from classifier import Classifier from helper import SentimentRequest, SentimentResponse # Create APP intance of FastAPI app = FastAPI() model = Classifier() templates = Jinja2Templates(directory="templates") # Index route. Default: http://127.0.0.1:8000 @app.get("/", response_class=HTMLResponse) async def read_item(request: Request): context = { "request" : request, 'title' : "Form Input for News Classifier" } return templates.TemplateResponse("index.html", context=context) @app.post('/predict/', response_model=SentimentResponse, status_code=200) async def predict_text(request: SentimentRequest): if not model: raise HTTPException(status_code=404, detail="Model not found.") pred = model.process(request.text) return SentimentResponse(text=request.text, prediction=pred)
from classifier import Classifier import time from flask import Flask, render_template, request app = Flask(__name__) print("Load classifier") start_time = time.time() classifier = Classifier() print("Classifier is successfully loaded") print(time.time() - start_time, "seconds") @app.route("/", methods=["POST", "GET"]) def index_page(text="", prediction_message=""): if request.method == "POST": text = request.form["text"] prediction_message = classifier.get_result_message(text) return render_template('simple_page.html', text=text, prediction_message=prediction_message) if __name__ == "__main__": app.run(host='127.0.0.1', port=8080, debug=True)
# Chunks should never include caption data from multiple videos, cut off before 500 if at the end of the caption, then start again for the next caption. # Idea: after question answering, compare the comment's video-ID to the video-ID of the caption chunk which answered the question. # this will give us an idea of where the answers are coming from (i.e. is it always from the associated video or not) # Change these to get best results with a static response to positive and negative comments. positive_threshold = 0.75 negative_threshold = -0.75 #################################### # # # Object Calls # # # #################################### classifier = Classifier() # Question classifier class. YTObj = CommentCollection(API_SERVICE_NAME, API_VERSION, DEVELOPER_KEY) # youtube acess object #################################### # # # Main Code Body # # # #################################### # dataframe for comments and captions with format """ --------------- | | 'commentList' | 'captionList' | | VIDEO_IDS[0] | list(...) | list(...) | ...
""" os.environ['PYTHONHASHSEED'] = '0' np.random.seed(17) rn.seed(12345) if __name__ == "__main__": set_reproductible() datadir = "../data/" trainfile = datadir + "traindata.csv" devfile = datadir + "devdata.csv" testfile = None # Basic checking start_time = time.perf_counter() classifier = Classifier() print("\n") # Training print("1. Training the classifier...\n") classifier.train(trainfile) # Evaluation on the dev dataset print("\n2. Evaluation on the dev dataset...\n") slabels = classifier.predict(devfile) glabels = load_label_output(devfile) eval_list(glabels, slabels) if testfile is not None: # Evaluation on the test data print("\n3. Evaluation on the test dataset...\n") slabels = classifier.predict(testfile) glabels = load_label_output(testfile) eval_list(glabels, slabels)
from flask import Flask, request import sys sys.path.append('./scripts/classifier') sys.path.append('./scripts/server/sql') from classifier import Classifier import utils, instructor, course, program_outcomes, learning_objectives, lab_schedule, \ assignment_schedule, project_schedule, mid_term_schedule, final_exam_schedule, \ course_grading txt_clf = Classifier() app = Flask(__name__) # http://localhost:5000/classify?text=who is doing it @app.route("/classify") def classify(): text = request.args.get('text') text = text.lower() text = utils.map_words_to_digits_in_text(text) question_type = txt_clf.classify(text) return _return_response_for_question(text, question_type) def _return_response_for_question(text, label): if label == 'instructor': return instructor.get_instructor_details(text) elif label == 'course_name': return course.get_course_details(text) elif label == 'course_learning_objectives': return learning_objectives.get_learning_objectives(text) elif label == 'program_outcome':
def main(is_interactive=True, k=64, des_option=constants.ORB_FEAT_OPTION, svm_kernel=cv2.SVM_LINEAR): if not is_interactive: experiment_start = time.time() # Check for the dataset of images if not os.path.exists(constants.DATASET_PATH): print("Dataset not found, please copy one.") return dataset = Dataset(constants.DATASET_PATH) dataset.generate_sets() # Check for the directory where stores generated files if not os.path.exists(constants.FILES_DIR_NAME): os.makedirs(constants.FILES_DIR_NAME) if is_interactive: des_option = input( "Enter [1] for using ORB features or [2] to use SIFT features.\n") k = input( "Enter the number of cluster centers you want for the codebook.\n") svm_option = input( "Enter [1] for using SVM kernel Linear or [2] to use RBF.\n") svm_kernel = cv2.SVM_LINEAR if svm_option == 1 else cv2.SVM_RBF des_name = constants.ORB_FEAT_NAME if des_option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME log = Log(k, des_name, svm_kernel) codebook_filename = filenames.codebook(k, des_name) if is_interactive: codebook_option = input( "Enter [1] for generating a new codebook or [2] to load one.\n") else: codebook_option = constants.GENERATE_OPTION if codebook_option == constants.GENERATE_OPTION: # Calculate all the training descriptors to generate the codebook start = time.time() des = descriptors.all_descriptors(dataset, dataset.get_train_set(), des_option) end = time.time() log.train_des_time(end - start) # Generates the codebook using K Means print("Generating a codebook using K-Means with k={0}".format(k)) start = time.time() codebook = descriptors.gen_codebook(dataset, des, k) end = time.time() log.codebook_time(end - start) # Stores the codebook in a file utils.save(codebook_filename, codebook) print("Codebook saved in {0}".format(codebook_filename)) else: # Load a codebook from a file print("Loading codebook ...") codebook = utils.load(codebook_filename) print("Codebook with shape = {0} loaded.".format(codebook.shape)) # Train and test the dataset classifier = Classifier(dataset, log) svm = classifier.train(svm_kernel, codebook, des_option=des_option, is_interactive=is_interactive) print("Training ready. Now beginning with testing") result, labels = classifier.test(codebook, svm, des_option=des_option, is_interactive=is_interactive) # Store the results from the test classes = dataset.get_classes() log.classes(classes) log.classes_counts(dataset.get_classes_counts()) result_filename = filenames.result(k, des_name, svm_kernel) test_count = len(dataset.get_test_set()[0]) result_matrix = np.reshape(result, (len(classes), test_count)) utils.save_csv(result_filename, result_matrix) # Create a confusion matrix confusion_matrix = np.zeros((len(classes), len(classes)), dtype=np.uint32) for i in range(len(result)): predicted_id = int(result[i]) real_id = int(labels[i]) confusion_matrix[real_id][predicted_id] += 1 print("Confusion Matrix =\n{0}".format(confusion_matrix)) log.confusion_matrix(confusion_matrix) log.save() print("Log saved on {0}.".format(filenames.log(k, des_name, svm_kernel))) if not is_interactive: experiment_end = time.time() elapsed_time = utils.humanize_time(experiment_end - experiment_start) print("Total time during the experiment was {0}".format(elapsed_time)) else: # Show a plot of the confusion matrix on interactive mode utils.show_conf_mat(confusion_matrix) raw_input("Press [Enter] to exit ...")
wordCounter[word] += 1 else: wordCounter[word] = 1 popularWords = sorted(wordCounter, key = wordCounter.get, reverse = True) lexicon = popularWords[:4000] # After learning lexicon here OOV words are replaced by UNK sign trainingSet0 = unkWord(trainingSet0, lexicon) trainingSet1 = unkWord(trainingSet1, lexicon) # positive and negative tweets are passed to training onject for word in trainingSet0: tweetTrainer.train(word, '0') for word in trainingSet1: tweetTrainer.train(word, '1') # a classifier instance sentimentClassifier = Classifier(tweetTrainer.data, tokenizer.Tokenizer(stop_words = [], signs_to_remove = [])) # storage of lexicon and sentiment classifier on disk c = open('sentimentClassifier.pickle', 'wb') l = open('lexicon.pickle', 'wb') pickle.dump(sentimentClassifier, c) pickle.dump(lexicon, l) c.close() l.close() file.close() # test section # loading lexicon and sentimentClassifier for evaluation over random samples # samples are chosen randomly and not within training samples c = open('sentimentClassifier.pickle', 'rb') l = open('lexicon.pickle', 'rb') fileEval=open('test2.csv', 'r', encoding="Latin-1")#.csv
"Test10(3Good3Bad).mp4" # "Detector/DLTest.mp4" # "Train1(1Good5Bad).mp4", # "Train2(1Good6Bad).mp4" # "Train3(1Good5Bad).mp4", # "Train4(1Good5Bad).mp4", ] for video in videoList: videoPath = "/home/eamonn/FYP/Videos/" + video gymObjects = { 'Gym_Plate': { 'Location': '', 'Frame': 0 }, 'FootWear': { 'Location': [], 'Frame': 0 } # 'Person': {'Location': [], # 'Frame': 0} } classifier = Classifier() classifier.createDecisionTreeClassifier() god = GymObjectDetector(gymObjects, videoPath) trackedObjects = god.getNormalisedObjectLocations() CSRTTracker = MultiTracker(gymObjects, videoPath, classifier) barbellPosition, footwearPosition = CSRTTracker.displayAndTrack()
class FitAndPredict: ''' Class contains function for the training and classification pipeline ''' def __init__(self): self.train_file = config.TRAIN_FILE self.test_file = config.TEST_FILE self.predicted_test_file = config.PREDICTED_TEST_FILE self.model_folder = config.MODELS_FOLDER self.target_map = config.TARGET_MAP self.map_sensors = config.MAP_SENSORS self.load_cell_theshold = config.LOAD_CELL_THRESHOLD self.weight_threshold = config.WEIGHT_THRESHOLD self.outliers_threshold = config.OUTLIERS_THRESHOLD self.feature_names = config.FEATURE_NAMES self.dev_map = config.DEV_MAP self.plank_dict = config.PLANK_DICT self.position_to_remove = config.POSITION_TO_REMOVE self.sensor_details_file = sensor_details_file self.norm_sensor_details_file = norm_sensor_details_file self.random_state = config.RANDOM_STATE self.min_samples_split = config.MIN_SAMPLES_SPLIT self.min_samples_leaf = config.MIN_SAMPLES_LEAF self.n_estimators = config.N_ESTIMATORS self.model_name = model_name self.target_column = config.TARGET_COLUMN self.preprocess = PreProcess(self.load_cell_theshold, self.weight_threshold, self.outliers_threshold, \ self.map_sensors, self.target_map, self.position_to_remove, \ self.sensor_details_file, self.norm_sensor_details_file, self.model_folder) self.fe = FeatureExtractor(self.plank_dict) def read_train_data(self): ''' Function to read the train data in the training pipeline ''' logging.debug(__name__ + ' : ' + ' Start read_train_data()') try: self.input_data = read_csv(self.train_file) logging.debug(__name__ + ' shape : ' + str(self.input_data.shape)) logging.debug(__name__ + ' : ' + ' End read_train_data()') except Exception as e: logging.error(__name__ + ' : ' + ' Input file not found ') logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass def check_train_data(self): ''' Function to check if all load cell columns and target column are present in the train data ''' logging.debug(__name__ + ' : ' + ' Start check_train_data()') try: train_columns = self.input_data.columns.values except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass # check if all load cell columns are present in the train data try: if (set(self.map_sensors.keys()).issubset(set(train_columns))) or \ (set(self.map_sensors.values()).issubset(set(train_columns))): pass else: print ("LOAD CELL COLUMNS NOT PRESENT IN TRAIN DATA") logging.debug(__name__ + ' : ' + ' LOAD CELL COLUMNS NOT PRESENT IN TRAIN DATA') logging.debug(__name__ + ' : ' + ' End read_train_data()') sys.exit() except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass # check if target column is present in the data try: if (set([self.target_column]).issubset(set(train_columns))): pass else: print ("TARGET COLUMN NOT PRESENT IN TRAIN DATA") logging.debug(__name__ + ' : ' + ' TARGET COLUMN NOT PRESENT IN TRAIN DATA') logging.debug(__name__ + ' : ' + ' End read_train_data()') sys.exit() except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass # check if all 5 positions are present in target column of train data try: if (set(self.input_data[self.target_column].unique()).issubset(set([1, 2, 3, 4, 5]))): pass else: print ("VALUES OTHER THAN PRESPECIFIED POSITION VALUES PRESENT IN TRAIN DATA") logging.debug(__name__ + ' : ' + ' VALUES OTHER THAN PRESPECIFIED POSITION VALUES PRESENT IN TRAIN DATA') logging.debug(__name__ + ' : ' + ' End read_train_data()') sys.exit() except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass logging.debug(__name__ + ' : ' + ' End check_train_data()') return def preprocess_train_data(self): ''' Function to preprocess the train data in the training pipeline ''' logging.debug(__name__ + ' : ' + ' Start preprocess_train_data()') try: logging.debug(__name__ + ' : ' + ' Start rename_columns_if_needed()') self.preprocessed_input_data = self.preprocess.rename_columns_if_needed(self.input_data) logging.debug(__name__ + ' : ' + ' End rename_columns_if_needed()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: logging.debug(__name__ + ' : ' + ' Start rem_missing_train()') self.preprocessed_input_data = self.preprocess.rem_missing_train(self.preprocessed_input_data) logging.debug(__name__ + ' : ' + ' End rem_missing_train()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: logging.debug(__name__ + ' : ' + ' Start rem_load_cell_threshold()') self.preprocessed_input_data = self.preprocess.rem_load_cell_threshold(self.preprocessed_input_data) logging.debug(__name__ + ' : ' + ' End rem_load_cell_threshold()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: logging.debug(__name__ + ' : ' + ' Start rem_less_weights()') self.preprocessed_input_data = self.preprocess.rem_less_weights(self.preprocessed_input_data) logging.debug(__name__ + ' : ' + ' End rem_less_weights()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: logging.debug(__name__ + ' : ' + ' Start rem_sitting()') self.preprocessed_input_data = self.preprocess.rem_sitting(self.preprocessed_input_data) logging.debug(__name__ + ' : ' + ' End rem_sitting()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: logging.debug(__name__ + ' : ' + ' Start normalize()') self.preprocessed_input_data = self.preprocess.normalize(self.preprocessed_input_data) logging.debug(__name__ + ' : ' + ' End normalize()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: logging.debug(__name__ + ' : ' + ' Start treat_outliers_train()') self.preprocessed_input_data = self.preprocess.treat_outliers_train(self.preprocessed_input_data) logging.debug(__name__ + ' : ' + ' End treat_outliers_train()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass logging.debug(__name__ + ' shape : ' + str(self.preprocessed_input_data.shape)) logging.debug(__name__ + ' : ' + ' End preprocess_train_data()') return def read_test_data(self): ''' Function to read the test data in the classification pipeline ''' logging.debug(__name__ + ' : ' + ' Start read_test_data()') try: self.test_data = read_csv(self.test_file, header = None) logging.debug(__name__ + ' shape : ' + str(self.test_data.shape)) logging.debug(__name__ + ' : ' + ' End read_test_data()') except Exception as e: logging.error(__name__ + ' : ' + ' Test file not found ') logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: return def check_test_data(self): ''' Function to check if all load cell columns are present in the test data ''' logging.debug(__name__ + ' : ' + ' Start check_test_data()') # check if all load cell columns are present in the test data #try: #test_columns = self.test_data.columns #except Exception as e: #logging.error(__name__ + ' : ' + ' Error: ' + str(e)) #finally: #pass try: #if (set(self.map_sensors.keys()).issubset(set(test_columns))) or (set(self.map_sensors.values()).issubset(set(test_columns))): test_columns = ['LC' + str(x) for x in range(1, 17)] if self.test_data.shape[1] == len(test_columns): self.test_data.columns = test_columns pass else: print ("TEST DATA DO NOT HAVE ALL LOAD CELLS DATA") logging.debug(__name__ + ' : ' + ' TEST DATA DO NOT HAVE ALL LOAD CELLS DATA') logging.debug(__name__ + ' : ' + ' End check_test_data()') sys.exit() except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass logging.debug(__name__ + ' : ' + ' End check_test_data()') return def preprocess_test_data(self): ''' Function to preprocess the test data in the classification pipeline ''' logging.debug(__name__ + ' : ' + ' Start preprocess_test_data()') try: self.preprocessed_test_data = self.test_data except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass #try: # logging.debug(__name__ + ' : ' + ' Start rename_columns_if_needed()') # self.preprocessed_test_data = self.preprocess.rename_columns_if_needed(self.test_data) # logging.debug(__name__ + ' : ' + ' End rename_columns_if_needed()') #except Exception as e: # logging.error(__name__ + ' : ' + ' Error: ' + str(e)) #finally: # pass try: logging.debug(__name__ + ' : ' + ' Start treat_missing_test()') self.preprocessed_test_data = self.preprocess.treat_missing_test(self.preprocessed_test_data) logging.debug(__name__ + ' : ' + ' End treat_missing_test()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: logging.debug(__name__ + ' : ' + ' Start normalize()') self.preprocessed_test_data = self.preprocess.normalize(self.preprocessed_test_data) logging.debug(__name__ + ' : ' + ' End normalize()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: logging.debug(__name__ + ' : ' + ' Start treat_outliers_test()') self.preprocessed_test_data = self.preprocess.treat_outliers_test(self.preprocessed_test_data) logging.debug(__name__ + ' : ' + ' End treat_outliers_test()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass logging.debug(__name__ + ' shape : ' + str(self.preprocessed_test_data.shape)) logging.debug(__name__ + ' : ' + ' End preprocess_test_data()') return def transform_train_data_into_features(self): ''' Function to create features from train data in the training pipeline ''' logging.debug(__name__ + ' : ' + ' Start transform_train_data_into_features()') try: # left_sensors_pct logging.debug(__name__ + ' : ' + ' Start left_percent()') self.preprocessed_input_data['left_sensors_pct'] = self.preprocessed_input_data.apply(lambda x : self.fe.left_percent(x), axis = 1) logging.debug(__name__ + ' : ' + ' End left_percent()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_1_std logging.debug(__name__ + ' : ' + ' Start plank_1_std_cal()') self.preprocessed_input_data['plank_1_std'] = self.preprocessed_input_data.apply(lambda x : self.fe.plank_1_std_cal(x), axis = 1) logging.debug(__name__ + ' : ' + ' End plank_1_std_cal()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_2_std logging.debug(__name__ + ' : ' + ' Start plank_2_std_cal()') self.preprocessed_input_data['plank_2_std'] = self.preprocessed_input_data.apply(lambda x : self.fe.plank_2_std_cal(x), axis = 1) logging.debug(__name__ + ' : ' + ' End plank_2_std_cal()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_3_std logging.debug(__name__ + ' : ' + ' Start plank_3_std_cal()') self.preprocessed_input_data['plank_3_std'] = self.preprocessed_input_data.apply(lambda x : self.fe.plank_3_std_cal(x), axis = 1) logging.debug(__name__ + ' : ' + ' End plank_3_std_cal()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_4_std logging.debug(__name__ + ' : ' + ' Start plank_4_std_cal()') self.preprocessed_input_data['plank_4_std'] = self.preprocessed_input_data.apply(lambda x : self.fe.plank_4_std_cal(x), axis = 1) logging.debug(__name__ + ' : ' + ' End plank_4_std_cal()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_1_x logging.debug(__name__ + ' : ' + ' Start get_com_1_x()') self.preprocessed_input_data['plank_1_com_x'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_1_x(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_1_x()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_2_x logging.debug(__name__ + ' : ' + ' Start get_com_2_x()') self.preprocessed_input_data['plank_2_com_x'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_2_x(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_2_x()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_3_x logging.debug(__name__ + ' : ' + ' Start get_com_3_x()') self.preprocessed_input_data['plank_3_com_x'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_3_x(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_3_x()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_4_x logging.debug(__name__ + ' : ' + ' Start get_com_4_x()') self.preprocessed_input_data['plank_4_com_x'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_4_x(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_4_x()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_1_y logging.debug(__name__ + ' : ' + ' Start get_com_1_y()') self.preprocessed_input_data['plank_1_com_y'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_1_y(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_1_y()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_2_y logging.debug(__name__ + ' : ' + ' Start get_com_2_y()') self.preprocessed_input_data['plank_2_com_y'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_2_y(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_2_y()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_3_y logging.debug(__name__ + ' : ' + ' Start get_com_3_y()') self.preprocessed_input_data['plank_3_com_y'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_3_y(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_3_y()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_4_y logging.debug(__name__ + ' : ' + ' Start get_com_4_y()') self.preprocessed_input_data['plank_4_com_y'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_4_y(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_4_y()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # y_errors logging.debug(__name__ + ' : ' + ' Start get_errors_from_fitted_line()') self.preprocessed_input_data['y_errors'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_errors_from_fitted_line(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_errors_from_fitted_line()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_3_deviation from fitted line through COMs of first two planks logging.debug(__name__ + ' : ' + ' Start get_deviation_plank_3()') self.preprocessed_input_data['plank_3_dev'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_deviation_plank_3(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_deviation_plank_3()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_4_deviation from fitted line through COMs of first two planks logging.debug(__name__ + ' : ' + ' Start get_deviation_plank_4()') self.preprocessed_input_data['plank_4_dev'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_deviation_plank_4(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_deviation_plank_4()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_3_dev_bucket logging.debug(__name__ + ' : ' + ' Start bucketize_plank_3_dev()') self.preprocessed_input_data['plank_3_dev_bucket'] = self.preprocessed_input_data['plank_3_dev'].apply(lambda x: self.fe.bucketize_plank_dev(x)) self.preprocessed_input_data['plank_3_dev_bucket'] = self.preprocessed_input_data['plank_3_dev_bucket'].apply(lambda x: self.dev_map[x]) logging.debug(__name__ + ' : ' + ' End bucketize_plank_3_dev()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_4_dev_bucket logging.debug(__name__ + ' : ' + ' Start bucketize_plank_4_dev()') self.preprocessed_input_data['plank_4_dev_bucket'] = self.preprocessed_input_data['plank_4_dev'].apply(lambda x: self.fe.bucketize_plank_dev(x)) self.preprocessed_input_data['plank_4_dev_bucket'] = self.preprocessed_input_data['plank_4_dev_bucket'].apply(lambda x: self.dev_map[x]) logging.debug(__name__ + ' : ' + ' End bucketize_plank_4_dev()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_4_deviation from fitted line through COMs of 2nd and 3rd planks logging.debug(__name__ + ' : ' + ' Start plank_4_wrt_3_2()') self.preprocessed_input_data['plank_4_wrt_3_2'] = self.preprocessed_input_data.apply(lambda x: self.fe.plank_4_wrt_3_2(x), axis = 1) logging.debug(__name__ + ' : ' + ' End plank_4_wrt_3_2()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass logging.debug(__name__ + ' shape : ' + str(self.preprocessed_input_data.shape)) logging.debug(__name__ + ' : ' + ' End transform_train_data_into_features()') return def transform_test_data_into_features(self): ''' Function to create features from test data in the classification pipeline ''' logging.debug(__name__ + ' : ' + ' Start transform_test_data_into_features()') try: # left_sensors_pct logging.debug(__name__ + ' : ' + ' Start left_percent()') self.preprocessed_test_data['left_sensors_pct'] = self.preprocessed_test_data.apply(lambda x : self.fe.left_percent(x), axis = 1) logging.debug(__name__ + ' : ' + ' End left_percent()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_1_std logging.debug(__name__ + ' : ' + ' Start plank_1_std_cal()') self.preprocessed_test_data['plank_1_std'] = self.preprocessed_test_data.apply(lambda x : self.fe.plank_1_std_cal(x), axis = 1) logging.debug(__name__ + ' : ' + ' End plank_1_std_cal()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_2_std logging.debug(__name__ + ' : ' + ' Start plank_2_std_cal()') self.preprocessed_test_data['plank_2_std'] = self.preprocessed_test_data.apply(lambda x : self.fe.plank_2_std_cal(x), axis = 1) logging.debug(__name__ + ' : ' + ' End plank_2_std_cal()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_3_std logging.debug(__name__ + ' : ' + ' Start plank_3_std_cal()') self.preprocessed_test_data['plank_3_std'] = self.preprocessed_test_data.apply(lambda x : self.fe.plank_3_std_cal(x), axis = 1) logging.debug(__name__ + ' : ' + ' End plank_3_std_cal()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_4_std logging.debug(__name__ + ' : ' + ' Start plank_4_std_cal()') self.preprocessed_test_data['plank_4_std'] = self.preprocessed_test_data.apply(lambda x : self.fe.plank_4_std_cal(x), axis = 1) logging.debug(__name__ + ' : ' + ' End plank_4_std_cal()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_1_x logging.debug(__name__ + ' : ' + ' Start get_com_1_x()') self.preprocessed_test_data['plank_1_com_x'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_1_x(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_1_x()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_2_x logging.debug(__name__ + ' : ' + ' Start get_com_2_x()') self.preprocessed_test_data['plank_2_com_x'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_2_x(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_2_x()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_3_x logging.debug(__name__ + ' : ' + ' Start get_com_3_x()') self.preprocessed_test_data['plank_3_com_x'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_3_x(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_3_x()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_4_x logging.debug(__name__ + ' : ' + ' Start get_com_4_x()') self.preprocessed_test_data['plank_4_com_x'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_4_x(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_4_x()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_1_y logging.debug(__name__ + ' : ' + ' Start get_com_1_y()') self.preprocessed_test_data['plank_1_com_y'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_1_y(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_1_y()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_2_y logging.debug(__name__ + ' : ' + ' Start get_com_2_y()') self.preprocessed_test_data['plank_2_com_y'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_2_y(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_2_y()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_3_y logging.debug(__name__ + ' : ' + ' Start get_com_3_y()') self.preprocessed_test_data['plank_3_com_y'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_3_y(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_3_y()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # com_4_y logging.debug(__name__ + ' : ' + ' Start get_com_4_y()') self.preprocessed_test_data['plank_4_com_y'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_4_y(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_com_4_y()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # y_errors logging.debug(__name__ + ' : ' + ' Start get_errors_from_fitted_line()') self.preprocessed_test_data['y_errors'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_errors_from_fitted_line(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_errors_from_fitted_line()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_3_deviation from fitted line through COMs of first two planks logging.debug(__name__ + ' : ' + ' Start get_deviation_plank_3()') self.preprocessed_test_data['plank_3_dev'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_deviation_plank_3(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_deviation_plank_3()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_4_deviation from fitted line through COMs of first two planks logging.debug(__name__ + ' : ' + ' Start get_deviation_plank_4()') self.preprocessed_test_data['plank_4_dev'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_deviation_plank_4(x), axis = 1) logging.debug(__name__ + ' : ' + ' End get_deviation_plank_4()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_3_dev_bucket logging.debug(__name__ + ' : ' + ' Start bucketize_plank_3_dev()') self.preprocessed_test_data['plank_3_dev_bucket'] = self.preprocessed_test_data['plank_3_dev'].apply(lambda x: self.fe.bucketize_plank_dev(x)) self.preprocessed_test_data['plank_3_dev_bucket'] = self.preprocessed_test_data['plank_3_dev_bucket'].apply(lambda x: self.dev_map[x]) logging.debug(__name__ + ' : ' + ' End bucketize_plank_3_dev()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_4_dev_bucket logging.debug(__name__ + ' : ' + ' Start bucketize_plank_4_dev()') self.preprocessed_test_data['plank_4_dev_bucket'] = self.preprocessed_test_data['plank_4_dev'].apply(lambda x: self.fe.bucketize_plank_dev(x)) self.preprocessed_test_data['plank_4_dev_bucket'] = self.preprocessed_test_data['plank_4_dev_bucket'].apply(lambda x: self.dev_map[x]) logging.debug(__name__ + ' : ' + ' End bucketize_plank_4_dev()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass try: # plank_4_deviation from fitted line through COMs of 2nd and 3rd planks logging.debug(__name__ + ' : ' + ' Start plank_4_wrt_3_2()') self.preprocessed_test_data['plank_4_wrt_3_2'] = self.preprocessed_test_data.apply(lambda x: self.fe.plank_4_wrt_3_2(x), axis = 1) logging.debug(__name__ + ' : ' + ' End plank_4_wrt_3_2()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: pass logging.debug(__name__ + ' shape : ' + str(self.preprocessed_test_data.shape)) logging.debug(__name__ + ' : ' + ' End transform_test_data_into_features()') return def train_model(self): ''' Function to train the model on train data for training pipeline ''' logging.debug(__name__ + ' : ' + ' Start train_model()') try: # train the model self.learner = Learner(self.n_estimators, self.min_samples_split, \ self.min_samples_leaf, self.random_state, \ self.model_folder, self.model_name) X_train = self.preprocessed_input_data[self.feature_names] Y_train = self.preprocessed_input_data[self.target_column] predictions, pred_prob = self.learner.train_model(X_train, Y_train) #print (np.round(accuracy_score(Y_train, predictions), 4) * 100) #print (np.round(log_loss(Y_train, pred_prob), 2)) logging.debug(__name__ + ' : ' + ' End train_model()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: return def classify(self): ''' Function to make classifications on test data for the classification pipeline ''' logging.debug(__name__ + ' : ' + ' Start classify()') try: # classify using the model self.classifier = Classifier(self.model_folder) X_test = self.preprocessed_test_data[self.feature_names] #Y_test = self.preprocessed_test_data[self.target_column] #print (X_test.shape, Y_test.shape) predictions, pred_prob = self.classifier.classify_model(X_test) # saving the test dataset with the predicted values #self.test_data[self.target_column] = predictions #self.test_data.to_csv(self.predicted_test_file, index = False) self.predicted_test_data = DataFrame({self.target_column:predictions}) self.predicted_test_data.to_csv(self.predicted_test_file, index = False, header = False) # printing the accuracy score #print (np.round(accuracy_score(Y_test, predictions), 4) * 100) #print (np.round(log_loss(Y_test, pred_prob), 2)) logging.debug(__name__ + ' : ' + ' End classify()') except Exception as e: logging.error(__name__ + ' : ' + ' Error: ' + str(e)) finally: return
class Recognizer: def __init__(self, agent_id): self.clf = Classifier( model_path='./models/model.augmented.pkl', vectorizer_path='./models/vectorizer.augmented.pkl', label_dict_path='./models/label_dict.augmented.pkl') self.q_detector = QuestionDetector(agent_id) self.tokenizer = MeCab.Tagger() def recognize(self, sentence): sentences = self.split_sentence(sentence) analyzed_sentences = [self.normalize(s) for s in sentences] sentences = [''.join([w.word for w in s]) for s in analyzed_sentences] intents = self.clf.predict(sentences) is_q = any(self.q_detector.detect(s) for s in sentences) results = [ self.get_detail(s, i) for s, i in zip(analyzed_sentences, intents) ] return results def get_detail(self, sentence, intent): if intent == 'DIVINE': agent_id = self.get_agent_id(''.join([w.word for w in sentence])) result = self.get_white_black(sentence) result = 'HUMAN' if result is None else result return ('DIVINE', agent_id, result) elif intent == 'VOTE': agent_id = self.get_agent_id(''.join([w.word for w in sentence])) return ('VOTE', agent_id) elif intent == 'ESTIMATE': agent_id = self.get_agent_id(''.join([w.word for w in sentence])) role = self.get_white_black(sentence) role = self.get_role(sentence) if role is None else role return ('ESTIMATE', agent_id, role) elif intent == 'CO': role = self.get_role(sentence) return ('CO', role) elif intent == 'REQUEST': return ('REQUEST') else: return ('CHAT') def get_agent_id(self, sentence): m = re.search(r'Agent\[(\d+)\]', sentence) if m: return m.group(1) else: return False def get_role(self, sentence): if any(w.word == '人狼' for w in sentence): return '狼' elif any(w.word == '狂人' for w in sentence): return '狂' elif any(w.word == '占い師' for w in sentence): return '占' else: return '村' def get_white_black(self, sentence): if any(w.word == '人狼' for w in sentence): return 'WEREWOLF' elif any(w.word == '村人' for w in sentence): return 'HUMAN' else: return None def normalize(self, sentence): sentence = self.norm_token(sentence) words = self.tokenize(sentence) words = [self.norm_role(w) for w in words] return words def tokenize(self, sentence): result = [] for line in self.tokenizer.parse(sentence).strip().split('\n'): if line == 'EOS': break word, feature = line.split('\t') result.append(Morph(word, feature)) return result def norm_role(self, s): if s.pos == '名詞': if re.match(r'人狼|狼|黒', s.word): s.word = '人狼' elif re.match(r'狂人|狂', s.word): s.word = '狂人' elif re.match(r'占い師|占い|占', s.word): s.word = '占い師' elif re.match(r'村人|人間|白', s.word): s.word = '村人' return s def norm_token(self, sentence): sentence = re.sub(r'[\..。]', '。', sentence) sentence = re.sub(r'[\,,、]', '、', sentence) sentence = re.sub(r'\?|?', '?', sentence) sentence = re.sub(r'\!|!', '!', sentence) return sentence def split_sentence(self, sentence): sentence = self.norm_token(sentence) sentence = re.sub(r'?', '?[SEP]', sentence) sentence = re.sub(r'!', '![SEP]', sentence) sentence = re.sub(r'。', '。[SEP]', sentence) sentence = re.sub(r'\[SEP\]$', '', sentence) sentences = sentence.split('[SEP]') return sentences
def iter_oger_nn_test_data(self): """Iter annotations of OGER filtered by the NN on the CRAFT corpus. yields: tuple: (pmid, sspan, espan, n_gram, label, entity ID) """ # perform concept recognition or not cr = int(self.config['other']['concept_recognition']) # load the classifier c = Classifier(self.config_path) c.restore_model() # create a feature extractor fextr = FeatureExtractor(self.config_path) # back mapping from integer to ontology mapping = {int(self.config['classes'][o]): o for o in self.config['classes']} # initialize feature names with empty arrays features = {} for name in c.column_names[:-1]: features[name] = [] # lists of term data tlists = [] # go through all OGER test annotations for pmid, sspan, espan, n_gram, label, entity_id in \ self.iter_oger_test_data(): # get list of feature values for i, val in enumerate(fextr.iter_feature_values(n_gram)): name = c.column_names[i] features[name].append(val) tlists.append((pmid, sspan, espan, n_gram, label, entity_id)) predictions = c.classifier.predict( input_fn=lambda: c.eval_input_fn(features)) # get predictions and zip them with other annotation data for i, pred_dict in enumerate(predictions): # list of (probability, entity type label)-tuples probs = [] # go through the probabilities of the entity types for index, p in enumerate(pred_dict['probabilities']): # append (probability, entity type label) probs.append((p, mapping[index])) # sort tuples by probability in decreasing order probs = sorted(probs, reverse=True) # labels to consider, by default only the one with highest prob labels = [probs[0][1]] # check if difference between the highest and second-highest # probability is smaller than 0.3 threshold = float(self.config['parameters']['threshold']) prob_diff = probs[0][0] - probs[1][0] if prob_diff < threshold: labels.append(probs[1][1]) # go through entity type labels for label in labels: # ignore entity types classified as normal nouns if label != 'nn': # check if concept recognition should be performed if cr: # ignore entity types where OGER and NN give different # labels if label == tlists[i][4]: yield tlists[i] else: yield tlists[i][:4] + (label,) + (tlists[i][-1],)
class WGAN(tf.keras.Model): def __init__(self, batch_size=64): super().__init__(name="WGAN") self.generator = Generator(100, batch_size) self.critic = Critic() self.classifier_m = Classifier() self.train_dataset = None self.test_dataset = None self.train_labels = None self.test_labels = None self.batch_size = batch_size def load_dataset(self, dataset, n_classes): self.train_dataset, self.train_labels, self.test_dataset, self.test_labels = dataset self.num_classes = n_classes @tf.function def predict_batch(self, images, type_class): images_predictions = tf.TensorArray(tf.float32, size=10, dynamic_size=True) ys = tf.TensorArray(tf.float32, size=10, dynamic_size=True) matched_images = tf.TensorArray(tf.float32, size=0, dynamic_size=True) index = 0 for i in tf.range(len(images)): gen_image = data_access.normalize( data_access.de_standardize(images[i])) img = tf.expand_dims(gen_image, axis=0) c_type = self.classifier_m.predict_image(img) w_list = tf.one_hot(c_type, self.num_classes) w_list = tf.reshape(w_list, (w_list.shape[1], )) images_predictions = images_predictions.write(i, w_list) y_list = tf.one_hot(type_class, self.num_classes) ys = ys.write(i, y_list) if (tf.reduce_all(tf.equal(w_list, y_list))): matched_images = matched_images.write(index, images[i]) index += 1 return images_predictions.stack(), ys.stack(), matched_images.stack() @tf.function def gradient_penalty(self, generated_samples, real_images, half_batch): alpha = backend.random_uniform(shape=[half_batch, 1, 1, 1], minval=0.0, maxval=1.0) differences = generated_samples - real_images interpolates = real_images + (alpha * differences) gradients = tf.gradients(self.critic(interpolates), [interpolates])[0] slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3])) gradient_p = tf.reduce_mean((slopes - 1.)**2) return gradient_p @tf.function def training_step_critic(self, real_imgs, gen_imgs, real_labels, gen_labels, half_batch): lambda_ = 10.0 with tf.GradientTape() as tape: d_x_real = self.critic(real_imgs, training=True) d_x_gen = self.critic(gen_imgs, training=True) critic_r_loss = self.critic.compute_loss(real_labels, d_x_real) critic_g_loss = self.critic.compute_loss(gen_labels, d_x_gen) total_loss = critic_r_loss + critic_g_loss + ( lambda_ * self.gradient_penalty(gen_imgs, real_imgs, half_batch)) gradients_of_critic = tape.gradient(total_loss, self.critic.trainable_variables) self.critic.backPropagate(gradients_of_critic, self.critic.trainable_variables) return total_loss @tf.function def training_step_generator(self, noise_size, class_type): # prepare points in latent space as input for the generator X_g = self.generator.generate_noise(self.batch_size, noise_size) # create inverted labels for the fake samples y_g = -np.ones((self.batch_size, 1)).astype(np.float32) with tf.GradientTape() as tape: d_x = self.generator(X_g, training=True) # Trainable? d_z = self.critic(d_x, training=True) images_predictions, ys, matched_images = self.predict_batch( d_x, class_type) generator_loss = self.generator.compute_loss( d_z, y_g, ys, images_predictions) gradients_of_generator = tape.gradient( generator_loss, self.generator.trainable_variables) self.generator.backPropagate(gradients_of_generator, self.generator.trainable_variables) return generator_loss, matched_images, self.generator( self.generator.seed, training=False) def generate_real_samples(self, n_samples): # choose random instances ix = np.random.randint(0, self.train_dataset.shape[0], n_samples) # select images X = self.train_dataset[ix] # associate with class labels of -1 for 'real' y = -np.ones((n_samples, 1)).astype(np.float32) return X, y @tf.function # use the generator to generate n fake examples, with class labels def generate_fake_samples(self, noise_size, n_samples): # generate points in latent space x_input = self.generator.generate_noise(n_samples, noise_size) # get images generated X = self.generator(x_input, training=True) # associate with class labels of 1.0 for 'fake' y = np.ones((n_samples, 1)).astype(np.float32) return X, y def define_loss_tensorboard(self): logdir = "logs/train/" + datetime.now().strftime("%Y%m%d-%H%M%S") return tf.summary.create_file_writer(logdir=logdir) def define_graph_tensorboard(self): logdir = "logs/graph/" + datetime.now().strftime("%Y%m%d-%H%M%S") return tf.summary.create_file_writer(logdir=logdir) def train_model(self, epoches, n_critic=5, noise_size=100, class_type=5): batch_per_epoch = int(self.train_dataset.shape[0] / self.batch_size) # calculate the number of training iterations n_steps = batch_per_epoch * epoches # calculate the size of half a batch of samples half_batch = int(self.batch_size / 2) sum_writer_loss = self.define_loss_tensorboard() self.classifier_m.load_local_model() avg_loss_critic = tf.keras.metrics.Mean() avg_loss_gen = tf.keras.metrics.Mean() epoch = 0 n_dif_images = 4 directory = 'imgs' start_time = time.time() for i in range(n_steps): for _ in range(n_critic): # get randomly selected 'real' samples X_real, y_real = self.generate_real_samples(half_batch) # generate 'fake' examples X_fake, y_fake = self.generate_fake_samples( noise_size, half_batch) # update critic model weights c_loss = self.training_step_critic(X_real, X_fake, y_real, y_fake, half_batch) avg_loss_critic(c_loss) gen_loss, matched_images, gen_images = self.training_step_generator( noise_size, class_type) avg_loss_gen(gen_loss) data_access.print_training_output(i, n_steps, avg_loss_critic.result(), avg_loss_gen.result()) if ((i % (n_steps / epoches)) == 0): data_access.store_images_seed(directory, gen_images[:n_dif_images], epoch) with sum_writer_loss.as_default(): tf.summary.scalar('loss_gen', avg_loss_gen.result(), step=self.generator.optimizer.iterations) tf.summary.scalar('avg_loss_critic', avg_loss_critic.result(), step=self.critic.optimizer.iterations) epoch += 1 data_access.create_collection(epoches, n_dif_images, directory) print('Time elapse {}'.format(time.time() - start_time)) def generate_images(self, number_of_samples, directory): seed = tf.random.normal([number_of_samples, 100]) images = self.generator(seed) predictions = self.classifier_m.predict_image_vector( data_access.normalize(data_access.de_standardize(images))) data_access.produce_generate_figure('imgs', images, predictions)
def __init__(self, microDataLoc, clusterNum=1, macroDataLoc="data/clusterData.txt"): Classifier.__init__(self, microDataLoc, clusterNum, macroDataLoc)
def start_rules_training(self): # Generate the tags for the values range gen_tags = gt(self.df) tags_ranges = gen_tags.set_tags() # Split the dataset in 5 random partitions parts_gen = Part(self.df) partition_set = parts_gen.gen_partition_set() # Initialize the best rulesset using full dataset best_rulesset = pd.DataFrame() best_rulesset = self.get_initial_rules(self.df, tags_ranges) # Train the rules with all posible combinations of training and test partitions for i in range(0, len(partition_set) - 1): # Select the partition for the test set, using the index of the loop test_set = partition_set[i] # Select the partitions for training set, removing test partition from a copy of the partitions list training_set = partition_set.copy() training_set.pop(i) # Remove test partition from training_set # Fuzzify the data from the test set fuzzifier = FuzGen(test_set) test_df = fuzzifier.fuzzify_data(tags_ranges) ''' Deal each training set with the rules set, to get the best rules set. In each iteration, accumulate the matched rules to the previous rules set. This will allows to distinct the best rules, which have been matched more times ''' for training_df in training_set: # Fuzzify training partition fuzzifier = FuzGen(training_df) fuzzy_df = fuzzifier.fuzzify_data(tags_ranges) # Deal the new rules set to the training partition classifier = Classifier(fuzzy_df, best_rulesset) classifier.classify_dataset() # Check results of classification: matched rules and positives rate TP_value, matched_rules = classifier.verify_classification() # Concatenate the matched rules to the current best rules set best_rulesset = pd.concat([best_rulesset, matched_rules]) ''' Once get the matched rules over the initial set, test the rules set over the test partition Before this, apply a filter to select only a rule for each antecesors set, based in the matches got from the training ''' # Filter the best rules, removing repeated antecesors best_rulesset = self.reduce_rules(best_rulesset, tags_ranges) # Try to classify the test set with the rules set get from training classifier = Classifier(test_df, best_rulesset) classifier.classify_dataset() # Check classification results TP_value, matched_rules = classifier.verify_classification() # Calculate accuraccy, as the division between the positives rate (matches) and the length of test set accuraccy = (TP_value / len(test_df)) print(f"Test {i} accuraccy: {accuraccy}") print(f"Lenght of minimal rules set: {len(best_rulesset)}") return best_rulesset
time_budget = max_time overall_time_budget = overall_time_budget + time_budget vprint( verbose, "[+] Cumulated time budget (all tasks so far) %5.2f sec" % (overall_time_budget)) # We do not add the time left over form previous dataset: time_budget += time_left_over vprint( verbose, "[+] Time budget for this task %5.2f sec" % time_budget) time_spent = time.time() - start vprint( verbose, "[+] Remaining time after reading data %5.2f sec" % (time_budget-time_spent)) if time_spent >= time_budget: vprint( verbose, "[-] Sorry, time budget exceeded, skipping this task") execution_success = False continue # ========= Creating a model, knowing its assigned task from D.info['task']. # The model can also select its hyper-parameters based on other elements of info. vprint( verbose, "======== Creating model ==========") M = Classifier() # ========= Reload trained model if it exists. vprint( verbose, "**********************************************************************") vprint( verbose, "****** Attempting to reload model (from res/) to avoid training ******") vprint( verbose, "**********************************************************************") you_must_train=1 modelname = os.path.join(res_dir,basename) if os.path.isfile(modelname + '_model.pickle'): M = M.load(modelname) you_must_train=0 vprint( verbose, "[+] Model reloaded, no need to train!") # ========= Train if needed only. if you_must_train: vprint( verbose, "======== Trained model not found, proceeding to train!")
ha='center', color=color) plt.xlabel('Predicted Value') plt.xticks(range(CLASSES)) plt.ylabel('Actual Value') plt.yticks(range(CLASSES)) plt.colorbar() plt.tight_layout() plt.savefig(str(plot_name), bbox_inches='tight', pad_inches=0) plt.close() # Seed for consistency np.random.seed(SEED) # Load best weights back up and make confusion matrices classifier = Classifier(input_size=INPUTS) weight_files = sorted(CLASS_MODEL_DIR.iterdir()) for weight_file in weight_files[:-1]: classifier.addLayer(file_name=weight_file, output=False) classifier.addLayer(file_name=weight_files[-1], output=True) train_conf_title = 'Train Confusion Matrix' makeConfMat(classifier, train_data, train_labels, CLASS_TRAIN_CONF, title=train_conf_title) test_conf_title = 'Test Confusion Matrix' makeConfMat(classifier, test_data, test_labels, CLASS_TEST_CONF,
def run_iteration(iteration, hash_map): lbp = LocalBinaryPatterns(24, 8) data = [] labels = [] #Finding all images images = [os.path.join(root, name) for root, dirs, files in os.walk("../training_images") for name in files if name.endswith((".jpeg", ".jpg"))] #Spliting it into training and testing groups training, testing = train_test_split(images, test_size = 0.25) #Training Phase for imagePath in training: #Load the image, convert it to grayscale, and compute LBP image = cv2.imread(imagePath) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if imagePath in hash_map: hist = hash_map[imagePath] else: hist = lbp.compute(gray) hash_map[imagePath] = hist print str(iteration) + " DEBUG(Training): Computed LBP Histogram for " + imagePath #Plotting histogram if needed #plt.bar(bin_edges[:-1], hist, width = 1) #plt.xlim(min(bin_edges), max(bin_edges)) #plt.show() #Extract the label from the image path, then update the label and data lists labels.append(imagePath.split("/")[-2]) data.append(hist) #Train classifier classifier = Classifier("SVM") print "\n\n" + str(iteration) + " DEBUG: Training Classifier" classifier.train(data, labels) print "\n\n" + str(iteration) + " DEBUG: Trained Classifier\n\n" #Testing Phase data = [] labels = [] for imagePath in testing: #Load the image, convert to grayscale, describe it and classify it image = cv2.imread(imagePath) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if imagePath in hash_map: hist = hash_map[imagePath] else: hist = lbp.compute(gray) hash_map[imagePath] = hist print str(iteration) + " DEBUG(Testing): Computed LBP Histogram for " + imagePath data.append(hist) labels.append(imagePath.split("/")[-2]) print "\n\n" + str(iteration) + " DEBUG: Forming predictions" predictions = classifier.predict(data) counter = 0 print "\n\n" + str(iteration) + " DEBUG: Printing predictions\n\n" for index, prediction in enumerate(predictions): print "Name -> " + testing[index] + " Actual -> " + labels[index] + " Prediction -> " + prediction if labels[index] == prediction: counter = counter + 1 accuracy = (float(counter)/float(len(predictions))) * 100.0 print "\n\n" + str(iteration) + " The Classifier Accuracy was " + str(accuracy) + "%" return accuracy
#Load the image, convert it to grayscale, and compute LBP image = cv2.imread(imagePath) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) hist = lbp.compute(gray) #Plotting histogram if needed #plt.bar(bin_edges[:-1], hist, width = 1) #plt.xlim(min(bin_edges), max(bin_edges)) #plt.show() #Extract the label from the image path, then update the label and data lists labels.append(imagePath.split("/")[-2]) data.append(hist) #Train classifier classifier = Classifier("SVM") classifier.train(data, labels) #Testing Phase data = [] testing = [ os.path.join(root, name) for root, dirs, files in os.walk("../testing_images") for name in files if name.endswith((".jpeg", ".jpg")) ] for imagePath in testing: #Load the image, convert to grayscale, describe it and classify it image = cv2.imread(imagePath) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) hist = lbp.compute(gray)
"nubank", "ciw", "cef-savings", "itau-cc", "itau-savings", "bradesco-savings", "generic" ], required=True, help="Set account that will be used.") parser.add_argument("-af", "--account_src_file", required=True, help="Set account source to integrate") parser.add_argument("-acf", "--account_from", help="Define from import") parser.add_argument("-act", "--account_to", help="Define to import") parser.add_argument( "-cl", "--classifier", default=None, choices=Classifier.AVAILABLE_STRATEGIES().keys(), help="Define classifier that must be used to import data") args = parser.parse_args() if args.verbose: loglevel = logging.DEBUG logformat = Util.LOG_FORMAT_DEBUG elif args.quiet: loglevel = logging.WARN # TODO log to file in this case logformat = Util.LOG_FORMAT_FULL else: loglevel = logging.INFO logformat = Util.LOG_FORMAT_SIMPLE # TODO config logger by dictnoray - https://realpython.com/python-logging/
from classifier import Classifier from detector import Detector from pipeline import * from steps import * with open(common.CONFIG_PATH) as f: config = yaml.load(f) main_pipeline = Pipeline([ Input("input"), DetectingSingleFrameStep( "detector", Detector(snapshot_path=config['paths']['detector']), EXTRACTORS[config["extractor"]]()), ClassifyingBoxesStep( "classifier", model=Classifier(snapshot_path=config['paths']['classifier']), input_width=common.CLASSIFIER_INPUT_WIDTH, input_height=common.CLASSIFIER_INPUT_HEIGHT), DecodeClassesStep("decoder", label_encoder=common.unpickle_data( config['paths']['label_encoder'])), ]) visualisation_pipeline = main_pipeline + [ VisualiseStep("visualise"), ShowVisualisation("showtime") ] without_showing = main_pipeline + [VisualiseStep("visualise")]