def __init__(self, fname, *args, **kargs):
        Classifier.__init__(self, fname, *args, **kargs)

        # sometimes a threshold value is trained during Bayesian
        # classification to avoid classifying too many 'documents' as
        # one kind or the other
        self.thresholds = [1.0, 1.0]
Пример #2
0
def main():
    parser = argparse.ArgumentParser(description='Clasificador de musica.\nToma los datos de entrenamiento de un archivo y utiliza algoritmos evolutivos para crear y mejorar las reglas de clasificación.')
    parser.add_argument('-d', '--data', help='Archivo donde se encuentra la información fuente para el clasificador.')
    args = vars(parser.parse_args())

    """
    Los valores default son:
        tamaño discretizacion - 100
        poblacion de generacion - 10
        min fitness para terminar - 0.9
        numero a seleccionar - 4
        porcentaje de mutacion - 0.05
        maximo de generaciones - 10000
        tipo de seleccion - ROULETTE_WHEEL_SELECTION
    """
    defaults = [100, 10, 0.9, 4, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION]

    classifier = Classifier(args['data'], discrete_intervals=defaults[0], size_rule_generation=defaults[1], filter_list=["skewness", "spectral_rolloff", "energy", "sv", "spread", "centroid", "obsi", "kurtosis"], log_results=True)
    start = time.clock()
    best_results = classifier.train(req_min_fitness=defaults[2], gen_select=defaults[3], mutation_prob=defaults[4], limit_generations=defaults[5])
    duration = (time.clock() - start)*1000
    print "Duration\t", duration, "ms"
    print "Training endend."
    print "Best results:", ', '.join([str(key) + " fitness: " + str(value['fitness']) for key, value in best_results.items()])
    print "Testing:"
    classifier.test()
    print "Testing ended."
Пример #3
0
def main():
	me=Classifier()
	feature_counter=Counter()
	feature_set=pickle.load(open('validation_set.pkl', 'rb'))
	feature_set_labels=[]
	for tweet, rating in feature_set:
		print rating
		try:
			float(rating)
		except:
			continue
		if float(rating)>0:
			label='positive'
		elif float(rating)<0:
			label='negative'
		else:
			label='neutral'
		feature_set_labels.append((tweet, label))
	feature_list=chain.from_iterable([word_tokenize(process_tweet(tweet)) for tweet, sentiment in feature_set_labels])
	for feat in feature_list:
		feature_counter[feat]+=1
	me.feature_list=[feat for feat, count in feature_counter.most_common(1000)]
	ts=[(me.extract_features(tweet), label) for tweet, label in feature_set]
	print 'training Maxent'
	me.classifier=MaxentClassifier.train(ts)
	return me
Пример #4
0
def main():
  dbinfo = recover()
  conn = MySQLdb.connect(**dbinfo)

  cur = conn.cursor()

  #Learn
  sql = "SELECT id,article_text,trainpos,trainneg,trainneutral FROM articles WHERE trainset=1 AND (trainpos>0 OR trainneg>0 OR trainneutral>0)"
  cur.execute(sql)
  a = Learner()
  for aid,article_text,trainpos,trainneg,trainneutral in cur.fetchall():
    aid = int(aid)
    items = [ (1, int(trainpos)),(0, int(trainneutral)),(-1, int(trainneg)) ]
    classification = max(items, key=lambda x : x[1])[0]
    a.add_string(article_text, classification)
  a.train()

  #Predict
  sql = "SELECT id,article_text FROM articles"
  cur.execute(sql)
  b = Classifier(a)
  for aid,article_text in cur.fetchall():
    aid = int(aid)
    classification = b.classify(article_text)
    sql = "UPDATE articles SET score=%s WHERE id=%s"
    args = [classification,aid]
    cur.execute(sql,args)
    print aid,classification

  conn.commit()
Пример #5
0
def eval_classifier(classifierToUse, featuresToUse, testOrTrain="train"):

    print("Chosen feature: {0}".format(featuresToUse) )
    print("Chosen classifier: {0}".format(classifierToUse))

    fe = FeatureExtractor(featuresToUse)
    dataset = DataSet(fe)
    classifier = Classifier()
    evaluate = Evaluation()

    print "test or Train %s" % testOrTrain
    for feature_class, files in getTestData(testOrTrain).items():
        print "%s" % testOrTrain
        for f in files:
            dataset.addFile(feature_class, f)

    print "Dataset initialized"
    print_class_stats(dataset.classes)

    print "Test set created."
    a_train, a_test, c_train, c_test = train_test_split(dataset.featureVector, dataset.classes, test_size=0.9)
    
    c_pred = classifier.classification(a_train,a_test,c_train,c_test,classifierToUse)
    
    evaluate.evaluate(c_pred,c_test,featuresToUse,classifierToUse)
def create_predict(HudongItem_csv):
	# 读取neo4j内容 
	db = Neo4j()
	db.connectDB()
	data_set = db.getLabeledHudongItem('labels.txt')
	classifier = Classifier('wiki.zh.bin')
	classifier.load_trainSet(data_set)     
	classifier.set_parameter(weight=[1.0, 3.0, 0.2, 4.0, 0],k=10)
	predict_List = readCSVbyColumn(HudongItem_csv, 'title')
	file_object = open('predict_labels2.txt','a')
	
	count = 0
	vis = set()
	for p in predict_List:
		cur = HudongItem(db.matchHudongItembyTitle(p))
		count += 1
		title = cur.title
		if title in vis:
			continue
		vis.add(title)
		label = classifier.KNN_predict(cur)
		print(str(title)+" "+str(label)+": "+str(count)+"/"+str(len(predict_List)))
		file_object.write(str(title)+" "+str(label)+"\n")
		
	file_object.close()
	
#create_predict('hudong_pedia2.csv')
	
Пример #7
0
  def __init__(self, D, H, W, K, iternum):
    Classifier.__init__(self, D, H, W, K, iternum)
    self.L = 100 # size of hidden layer

    """ Layer 1 Parameters """
    # weight matrix: [M * L]
    self.A1 = 0.01 * np.random.randn(self.M, self.L)
    # bias: [1 * L]
    self.b1 = np.zeros((1,self.L))

    """ Layer 3 Parameters """
    # weight matrix: [L * K]
    self.A3 = 0.01 * np.random.randn(self.L, K)
    # bias: [1 * K]
    self.b3 = np.zeros((1,K))

    """ Hyperparams """
    # learning rate
    self.rho = 1e-2
    # momentum
    self.mu = 0.9
    # reg strencth
    self.lam = 0.1
    # velocity for A1: [M * L]
    self.v1 = np.zeros((self.M, self.L))
    # velocity for A3: [L * K] 
    self.v3 = np.zeros((self.L, K))
    return
def runNeuralNetwork(train, test, batchSize, classNum, hLayer=None, mode=None, momentumFactor=0.0):
    """
    A function that call the the classifier to train a learning model.
    Args:
        train: training examples (numpy)
        test: testing examples (numpy)
        batchSize: the number of training example for each iteration
        classNum: the number of classes
        hLayer: number of the hidden layer nodes (list)
        mode: weight initializing mode
        momentumFactor: momentum factor
    """
    print ""
    print "Neural Network =============================="
    print " - number of hidden layer nodes:",
    if hLayer is not None:
        print hLayer
    else:
        print " default (one hidden layer with node number = 2 * feature number)"

    print " - weight initialization mode:",
    if mode is not None:
        print mode
    else:
        print "default"

    print " - momentum factor", momentumFactor

    nn = Classifier("neural_network", hidden_layer=hLayer, weightInitMode=mode, momentumFactor=momentumFactor)
    nn.train(train, test, classNum, batchSize)
    nn.test(test, "test")
def build_model_mnist():

    # CNN
    filter_size = (5, 5)
    activation = Rectifier().apply
    pooling_size = (2, 2)
    num_filters = 50
    layer0 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters,
                              pooling_size=pooling_size,
                              weights_init=Uniform(width=0.1),
                              biases_init=Uniform(width=0.01), name="layer_0")

    filter_size = (3, 3)
    activation = Rectifier().apply
    num_filters = 20
    layer1 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters,
                              pooling_size=pooling_size,
                              weights_init=Uniform(width=0.1),
                              biases_init=Uniform(width=0.01), name="layer_1")

    conv_layers = [layer0, layer1]
    convnet = ConvolutionalSequence(conv_layers, num_channels= 1,
                                    image_size=(28, 28))

    convnet.initialize()
    output_dim = np.prod(convnet.get_dim('output'))
    mlp = MLP(activations=[Identity()], dims=[output_dim, 10],
                        weights_init=Uniform(width=0.1),
                        biases_init=Uniform(width=0.01), name="layer_2")
    mlp.initialize()

    classifier = Classifier(convnet, mlp)
    classifier.initialize()
    return classifier
Пример #10
0
def average_multiple_runs(num_runs, options, args):
    for num, option in enumerate(options):
        print "Running", num_runs, "iterations with options:", option
        list_best_results = []
        list_test_results = []
        list_correct_results = []
        for i in range(num_runs):
            print "Running #" + str(i + 1)
            classifier = Classifier(args['data'], discrete_intervals=option[0], size_rule_generation=option[1], filter_list=["skewness", "spectral_rolloff", "energy", "sv", "spread", "centroid", "obsi", "kurtosis"], log_results=False)
            best_results = classifier.train(req_min_fitness=option[2], gen_select=option[3], mutation_prob=option[4], limit_generations=option[5], selection_type=option[6])
            test_results, correct_results = classifier.test()
            list_best_results.append(best_results)
            list_test_results.append(test_results)
            list_correct_results.append(correct_results)
        print "Results for option: ", option
        print "run\ttype\tgen\tfitness"
        for i, results in enumerate(list_best_results):
            for rule, result in results.items():
                print str(i + 1) + "\t" + rule[:7] + "\t" + str(result["generation"]) + "\t" + str(result["fitness"])

        print "run\ttype\tavg correct rules"
        for i, results in enumerate(list_test_results):
            for avg_map in results:
                print str(i + 1) + "\t" + avg_map.keys()[0][:7] + "\t" + str(avg_map[avg_map.keys()[0]])

        print "run\ttype\tavg correct results"
        for i, results in enumerate(list_correct_results):
            for avg_map in results:
                print str(i + 1) + "\t" + avg_map.keys()[0][:7] + "\t" + str(avg_map[avg_map.keys()[0]])
Пример #11
0
    def run(self):
        """
        Function: Run
        -------------
        This function will evaluate your solution! You do not need to
        write any code in this file, however you SHOULD understand this
        function!
        """
        print "Running the full pipeline!"
        K=25
        trainImages = util.loadTrainImages()[:1000]
        testImages = util.loadTestImages()

        classifier = Classifier()

        print 'Training..........'
        classifier.train(trainImages, K)

        trainPredictions = classifier.test(trainImages)
        trainAccuracy = self.evaluate(trainPredictions, trainImages)

        print 'Testing...........'
        testPredictions = classifier.test(testImages)
        testAccuracy = self.evaluate(testPredictions, testImages)

        print 'All done. Here is your summary:'
        self.reportAccuracy(trainAccuracy, 'Train Accuracy')
        self.reportAccuracy(testAccuracy, 'Test Accuracy')
Пример #12
0
def GetNewArticles(request):
    # Get the articles from RSS
    # aggregator = NewsAggregator()
    # list_of_articles = aggregator.feedreader()
    classifier = Classifier("filename.pkl")
    # Predict
    list_of_classes = []
    # with open("articles_dump", "wb") as dump:
    #     pickle.dump(list_of_articles, dump, pickle.HIGHEST_PROTOCOL)
    with open("articles_dump") as dump:
        list_of_articles = pickle.load(dump)
    for article in list_of_articles:
        list_of_classes.append(article["content"])
    # print list_of_classes
    res = classifier.predict(np.asarray(list_of_classes))

    for i in range(0, len(list_of_articles)):
        if res[i] == 1:
            cat = "Sports"
        elif res[i] == 2:
            cat = "Economy_business_finance"
        elif res[i] == 3:
            cat = "Science_technology"
        else:
            cat = "Lifestyle_leisure"
        element = list_of_articles[i]
        list_of_articles[i]["category"] = cat
        article = Article(article_title=element["title"], article_content=element["content"], article_category=cat)
        article.save()
    json_object = json.dumps(list_of_articles)
    return HttpResponse(json_object)
Пример #13
0
def test_classify_by_randomforest():
    stock_d = testdata()
    ti = TechnicalIndicators(stock_d)

    filename = 'test_N225_randomforest.pickle'
    clffile = os.path.join(os.path.dirname(
                           os.path.abspath(__file__)),
                           '..', 'clf',
                           filename)

    if os.path.exists(clffile):
        os.remove(clffile)

    clf = Classifier(filename)
    ti.calc_ret_index()
    ret = ti.stock['ret_index']

    train_X, train_y = clf.train(ret, classifier="Random Forest")

    eq_(filename, os.path.basename(clf.filename))

    r = round(train_X[-1][-1], 5)
    expected = 1.35486
    eq_(r, expected)

    r = round(train_X[0][0], 5)
    expected = 1.08871
    eq_(r, expected)

    expected = 14
    r = len(train_X[0])
    eq_(r, expected)

    expected = 120
    r = len(train_X)
    eq_(r, expected)

    expected = [1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
                0, 0, 1, 0, 0, 1, 0, 1, 0, 1,
                1, 0, 1, 1, 1, 1, 1, 0, 1, 0,
                1, 1, 1, 1, 0, 1, 0, 1, 1, 0,
                1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
                0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
                1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
                1, 1, 0, 0, 1, 0, 1, 1, 0, 1,
                1, 0, 1, 1, 0, 1, 0, 0, 1, 0,
                1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
                1, 1, 1, 0, 1, 1, 1, 0, 0, 1,
                1, 0, 0, 1, 1, 1, 0, 1, 1, 0]

    for r, e in zip(train_y, expected):
        eq_(r, e)

    expected = 1
    test_y = clf.classify(ret)
    assert(test_y[0] == 0 or test_y[0] == 1)

    if os.path.exists(clffile):
        os.remove(clffile)
Пример #14
0
def main(mode='test'):
    cl = Classifier()
    cl.create_db('bunyk.db')

    if mode == 'test':
        test(cl)
    else:
        train(cl, 'http://bunyk.wordpress.com')
Пример #15
0
    def setUp(self):
        text = u"Comment Google classe les pages Internet"

        c = Classifier(CleanTextUtil("french"))
        c.add_text(text)

        self.dictionary_db = c.dictionary_db
        self.vi = VectorItem("googl", "1")
Пример #16
0
def askfunc():
    options=\
    {
        "login": False,
        "username": "",
        "status": 0
    }
    error = None
    if session.has_key('username'):
        options["login"]=True
        options["username"]=session["username"]


    else:
        return render_template('errorpage.html', error = error)

    if request.method=='POST':
        op=request.form['op']
        print ("enter the post")
        if op == "submit":

            newpic = request.files['photo']
            picAdded = True;
            cur=mysql.connection.cursor()
            cur.execute("select userid from user where username = "******"'" + session["username"] + "'")
            useridresult = [];

            useridresult=cur.fetchall()
            userid = useridresult[0][0]
            userid = str(userid)
            currentuserid = userid;
            currentuserid = str(currentuserid)
            pic_location = "static/pictures/"  + currentuserid +   "/" +newpic.filename
            newpic.save(pic_location)

            title = request.form['title']
            ori = request.form['ori']
            ori = str(ori)

            print ori
            if ori == "I dont know": #TODO: update with proper default value
                print ("classifier entered")
                cc = Classifier(pic_location)
                ori = cc.classify_text()
                ori = ori.title()
            tar = request.form['tar']
            #if tar == 'NA'
            des = request.form['des']
            cur=mysql.connection.cursor()
            cur.execute("insert into post (title, description, origin, target, pathtophoto, userid) values ('" + title + "', '" + des + "', '" + ori + "', '" + tar + "', '" + newpic.filename +"', "+ currentuserid +");")
            print ("insert into post (title, description, origin, target, pathtophoto, userid) values ('" + title + "', '" + des + "', '" + ori + "', '" + tar + "', '" + newpic.filename +"', "+ currentuserid +");")
            cur.execute("commit")


        return render_template('redirect.html',  error=error)


    return render_template('ask.html',  error=error, **options)
Пример #17
0
def test_combinations(args, graph=False):
    py = plotly.plotly(username='******', key='uzkqabvlzm', verbose=False)
    options = [100, 10, 0.9, 4, 0.05, 10000]
    features = ["skewness", "spectral_rolloff", "energy", "sv", "spread", "centroid", "zcr", "obsi", "kurtosis"]
    electronic_y = []
    classical_y = []
    categories = []

    print '\t'.join([feature[:2] for feature in features] + ["meta", "acou", "regg", "elec", "class"])

    for i in range(1, len(features) + 1):
        combinations = [list(comb) for comb in itertools.combinations(features, i)]
        for comb in combinations:
            comb_name = ', '.join(comb)
            classifier = Classifier(args['data'], discrete_intervals=options[0], size_rule_generation=options[1], filter_list=comb)
            top_fitness = classifier.train(req_min_fitness=options[2], gen_select=options[3], mutation_prob=options[4], limit_generations=options[5])
            for feature in features:
                if feature in comb:
                    sys.stdout.write("X\t")
                else:
                    sys.stdout.write("\t")
            sys.stdout.write(str(top_fitness['metal']["fitness"])[:4] + "\t")
            sys.stdout.write(str(top_fitness['acoustic']["fitness"])[:4] + "\t")
            sys.stdout.write(str(top_fitness['reggae']["fitness"])[:4] + "\t")
            sys.stdout.write(str(top_fitness['electronic']["fitness"])[:4] + "\t")
            sys.stdout.write(str(top_fitness['classical']["fitness"])[:4] + "\n")

            if graph:
                print "Training ended\nFinal fitness:", top_fitness
                electronic_y.append(top_fitness['metal'])
                classical_y.append(top_fitness['classical'])
                categories.append(comb_name)

                if len(categories) > 20:
                    electronic = {
                        "name": "Metal",
                        "x": categories,
                        "y": electronic_y,
                        "type": "bar"
                    }

                    classical = {
                        "name": "Classical",
                        "x": categories,
                        "y": classical_y,
                        "type": "bar"
                    }

                    layout = {
                        "barmode": "group",
                        'xaxis': {'type': 'combination'},
                        'catagories': categories
                    }
                    response = py.plot([electronic, classical], layout=layout)
                    print response['url']
                    electronic_y = []
                    classical_y = []
                    categories = []
Пример #18
0
 def cl_button_clicked_cb(self, button):
     """Classify button callback
     :param button: signal came from this button
     """
     if not len(self.sel_files):
         return
     self.counter = -1
     for row in self.sel_files:
         Classifier.classify(self.all_files[row], MainWindow.SR, row, self.update_classify_progress_cb)
Пример #19
0
def test_classify():

    proxy = ReviewsMongoProxy("tripadvisor_train")
    review = proxy.find_review_by_id(proxy.next_random_review_id())

    classifier = Classifier("../tripadvisor/aspect_nltk_nb.pkl")
    classifier.classify(review)

    print_review(review)
Пример #20
0
def classifier(search_query) :
    cls = Classifier(' '.join(search_query.split('_')))
    classified_output = cls.classify()
    
    if classified_output != None and len(classified_output) > 0 :
        with open("output/" + search_query+".json","w") as out :
            out.write(json.dumps(classified_output))
        
        return json.dumps({"query" : search_query, "status": "Success"})
    else :
        return json.dumps({"query" : search_query, "status": "Failed"})
def main(c = "decision_tree", option = "IG", dataset = "iris", ratio = 0.8):

	classifier_types = {0: "decision_tree", 1: "naive_bayes", 2: "neural_net"}
	options = {0:["IG", "IGR"], 1:["normal"], 2:["shallow", "medium"]}

	ratio = float(ratio)

	if dataset == "monks":
		(training, test) = load_data.load_monks(ratio)
	elif dataset == "congress":
		(training, test) = load_data.load_congress_data(ratio)
	elif dataset == "iris":
		(training, test) = load_data.load_iris(ratio)
	else:
		print "Error: Cannot find dataset name."
		return

	print "Training... Please hold."
	# classifier_types = {0: "decision_tree", 2: "neural_net"}
	# options = {0:["IG", "IGR"], 2:["shallow", "medium"]}
	# (training, test) = load_data.load_iris(0.8)
	# nn_classifier = Classifier(classifier_type="neural_net", option = "medium")
	# nn_classifier.train(training)
	# nn_classifier.test(test)

	# print test
	# (training, test) = load_data.load_congress_data(0.8)
	# print test
	# (training, test) = load_data.load_monks(1)
	# print test	

	# (training, test) = load_data.load_iris(0.8)
	# print training
	# "option = IG/IGR"
	# dt_classifier = Classifier(classifier_type="decision_tree", weights=[], option="IG")
	# dt_classifier.train(training)
	# dt_classifier.test(test)
	# for i, c in classifier_types.iteritems():
	# 	for option in options[i]:
	print "                                                                 "
	print "================================================================="
	print "Dataset    = ", dataset
	print "Classifier = ", c
	print "Option     = ", option
	classifier = Classifier(classifier_type=c, weights = [], option = option)
	classifier.train(training)
	classifier.test(test)
	print "================================================================="
	print "                                                                 "
	# option value could be either shallow(3 layers) or medium(5)
	# nn_classifier = Classifier(classifier_type="neural_net", option = "medium")
	# nn_classifier.train(training)
	# nn_classifier.test(test)
	return 
Пример #22
0
def test_performance(args, num_runs):
    #Features:
    features = ["skewness", "spectral_rolloff", "energy", "sv", "spread", "centroid", "zcr", "obsi", "kurtosis"]
    option = [100, 10, 0.9, 2, 0.05, 1000, selection.ROULETTE_WHEEL_SELECTION]
    for i in range(1, len(features) + 1):
        print "Num of features:", i
        for num_run in range(num_runs):
            classifier = Classifier(args['data'], discrete_intervals=option[0], size_rule_generation=option[1], filter_list=features[:i], log_results=False)
            start = time.clock()
            classifier.train(req_min_fitness=option[2], gen_select=option[3], mutation_prob=option[4], limit_generations=option[5], selection_type=option[6])
            duration = (time.clock() - start)*1000
            print num_run, "\t", duration
Пример #23
0
def main():
	me=Classifier()
	feature_counter=Counter()
	feature_set=pickle.load(open('undersampled_emoticon.pkl', 'rb'))
	feature_list=chain.from_iterable([word_tokenize(process_tweet(tweet)) for tweet, sentiment in feature_set])
	for feat in feature_list:
		feature_counter[feat]+=1
	me.feature_list=[feat for feat, count in feature_counter.most_common(1000)]
	ts=[(me.extract_features(tweet), label) for tweet, label in feature_set]
	print 'training Maxent, algorithm CG'
	me.classifier=MaxentClassifier.train(ts)
	return me
Пример #24
0
def makeClassifier():
    jiraGitMapper = Mapper()

    # Create a mapping of jira commits to git tickets
    ticketsToCommits = jiraGitMapper.mapCommitsToTickets(gitData, jiraData, "SONAR-")
    # Take the git commits and associate them with java class names
    ticketsAndCommitsToClasses = jiraGitMapper.mapCommitsToClasses(ticketsToCommits)
    ticketsToClasses = ticketsAndCommitsToClasses[0]
    commitsToClasses = ticketsAndCommitsToClasses[1]
    classifier = Classifier()
    results = classifier.classifyClasses(ticketsToClasses)
#    results = classifier.randomClassifyClasses(ticketsToClasses)
    print("Precision: %.3f, Recall: %.3f, Accuracy: %.3f, f1 score: %.3f, hamming loss: %.3f" % (results[0], results[1], results[2], results[3], results[4])) 
Пример #25
0
def get_predictions(sample_x):
    c=Classifier()
    predictions=[]
    try:
        for x in sample_x:
            row=x
            labels=get_labels(x)
            p=c.get_prediction(row,labels)
            p=1 if p else 0
            predictions.append(p)
    except Exception as e:
        show_exception(e)
        return []
    return predictions
    def run(self):
        global worksqueue, spectImg

        classifier = Classifier()

        while True:
            sample = worksqueue.get()
            worksqueue.task_done()

            result = classifier.classify(sample)

            print "\nPreciction: %s\n" % result
        
            spectImg = writeMFCC(sample, RATE)
Пример #27
0
def main():
    args = parser.parse_args()
    data_json = read_dataset(args.data)

    processor = TextProcessor()
    classifier = Classifier(processor)
    classifier.train(data_json)

    serialized_classifier = classifier.dump()

    ensure_directory(args.output)
    with open(args.output, 'w') as f:
        f.write(serialized_classifier)
        f.write(os.linesep)
Пример #28
0
def test(args):
    test_performance(args, 5)
    return
    #return
    options = [
        [100, 10, 0.9, 4, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],  # discrete_intervals, size_rule_generation, req_min_fitness, gen_select, limit_generations
        [1000, 10, 0.9, 4, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [100, 20, 0.9, 4, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [100, 5, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [100, 10, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [100, 10, 0.9, 6, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [200, 50, 0.9, 10, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [300, 10, 0.9, 4, 0.1, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [500, 15, 0.9, 2, 0.005, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [50, 20, 0.9, 4, 0.1, 10000, selection.ROULETTE_WHEEL_SELECTION]
    ]

    #prueba Tamaño de población
    options = [
        [100, 5, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [100, 10, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [100, 15, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [100, 20, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [100, 30, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [100, 50, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION]
    ]

    #prueba Proceso de seleccion
    options = [
        [100, 10, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],
        [100, 10, 0.9, 2, 0.05, 10000, selection.RANK_SELECTION],
        [100, 10, 0.9, 2, 0.05, 10000, selection.TOURNAMENT_SELECTION]
    ]

    options = [
        [100, 10, 0.9, 2, 0.05, 10000, selection.ROULETTE_WHEEL_SELECTION],
    ]

    average_multiple_runs(30, options, args)

    test_combinations(args)

    for num, option in enumerate(options):
        print "Option num:", num, ", val:", option
        classifier = Classifier(args['data'], discrete_intervals=option[0], size_rule_generation=option[1], filter_list=["skewness", "spectral_rolloff", "energy", "sv", "spread", "centroid", "obsi", "kurtosis"], log_results=True)
        best_results = classifier.train(req_min_fitness=option[2], gen_select=option[3], mutation_prob=option[4], limit_generations=option[5])
        print "Testing"
        classifier.test()
        # classifier.guess_genre([7.53659769442,1389.49121537,0.0166588959174,0.355062895642,1480.75635175,769.172547276,3.47303203307,69.8220939453])
        print "Training ended\nFinal fitness:", best_results
Пример #29
0
class EmojiRecommender():
	def __init__(self, fname_model, fname_embed, fname_dataset):
		print >> sys.stderr, 'EmojiRecommender: [info] loading word index...'
		self.windexer = WordIndexer.load(fname_embed)
	
		print >> sys.stderr, 'EmojiRecommender: [info] loading model...'		
		self.clf = Classifier()
		self.clf.load_model(fname_model)

		print >> sys.stderr, 'EmojiRecommender: [info] loading emojis...'
		ecode_split = cPickle.load(open(fname_dataset, 'r'))
		self.emojis = [emo for emo, split in ecode_split]

		self.ydim = len(self.emojis)

		print >> sys.stderr, 'EmojiRecommender: [info] initialization done'

	def preprocess(self, text):
		text = text.decode('utf8')
		seq = zhtokenizer.tokenize(text)
		idxs = self.windexer.seq2idx(seq)

		return idxs

	def predict_proba(self, text):
		idxs = self.preprocess(text)
		
		if len(idxs) == 0:
			return None
		else:
			return self.clf.predict_proba(idxs)

	def recommend(self, text, n = 5):
		proba = self.predict_proba(text)

		if proba is None:
			eids = [i for i in range(n)]
			scores = [0. for i in range(n)]
		else:
			ranks = [(i, proba[i]) for i in range(self.ydim)]
			ranks = sorted(ranks, key = lambda k:-k[1])

			eids = [ranks[i][0] for i in range(n)]
			scores = [ranks[i][1] for i in range(n)]

		res = [{'emoji':self.emojis[eid], 'score':'%.2f'%(score)} for eid, score in zip(eids, scores)]

		return res
Пример #30
0
    def runDev(self):
        print "Running in development mode"

        K=5
        trainImages = util.loadTrainImages()[:100]
        testImages = util.loadTestImages()[:100]
        
        classifier = Classifier()
        
        print 'Training..........'
        classifier.train(trainImages, K)
        trainPredictions = classifier.test(trainImages)
        trainAccuracy = self.evaluate(trainPredictions, trainImages)

        print 'All done. Here is your summary:'
        self.reportAccuracy(trainAccuracy, 'Train Accuracy')
Пример #31
0
    fscore_top100 = Queue.Queue()
    fscore_feat = Queue.Queue()
    fscore_nofeat = Queue.Queue()
else:
    fscore_top100 = np.zeros(num_folds)
    fscore_feat = np.zeros(num_folds)
    fscore_nofeat = np.zeros(num_folds)

for fold in range(num_folds):
    print "Training and testing fold " + str(fold + 1) + "..."
    # Split dataset into train and set based on current fold
    train_set, train_labels, test_set, test_labels = utils.split_set(
        full_set, labels, thresholds[fold], thresholds[fold + 1])

    if args.t:
        t_feat = Thread(target=Classifier(clf(), True, False,
                                          args.t).learn_classifier,
                        args=(train_set, train_labels, test_set, test_labels,
                              fscore_feat))
        t_nofeat = Thread(target=Classifier(clf(), False, False,
                                            args.t).learn_classifier,
                          args=(train_set, train_labels, test_set, test_labels,
                                fscore_nofeat))
        t_100 = Thread(target=Classifier(clf(), True, True,
                                         args.t).learn_classifier,
                       args=(train_set, train_labels, test_set, test_labels,
                             fscore_top100))
        t_feat.start()
        t_nofeat.start()
        t_100.start()
        t_feat.join()
        t_nofeat.join()
Пример #32
0
def test_with_image(img_path):
    VGG_Face = Vgg_face_dag()
    VGG_Face = vgg_face_dag(VGG_Face, "src/models/vgg_face_dag.pth")
    thicc = 2
    score = 0  # To evaluate the state of the driver(drowsy or not)
    frame_count = 0
    frames = []
    path = os.getcwd()
    font = cv2.FONT_HERSHEY_COMPLEX_SMALL

    img = cv2.imread(img_path)

    height, width = img.shape[:2]

    classifier = Classifier(img)

    left_eye_pred = classifier.left_eye()

    right_eye_pred = classifier.right_eye()

    frames.append(img)
    drunk_pred = classifier.drunk_pred(frames, VGG_Face)

    if drunk_pred == 1:
        cv2.putText(img, "Drunk", (10, 20), font, 1, (255, 255, 255), 1,
                    cv2.LINE_AA)
    else:
        cv2.putText(img, "Sober", (10, 20), font, 1, (255, 255, 255), 1,
                    cv2.LINE_AA)

    if left_eye_pred == 0 and right_eye_pred == 0:
        score += 1
        cv2.putText(img, "Asleep", (10, height - 20), font, 1, (255, 255, 255),
                    1, cv2.LINE_AA)

    else:
        score = -1
        cv2.putText(img, "Awake", (10, height - 20), font, 1, (255, 255, 255),
                    1, cv2.LINE_AA)

    if score < 0:
        score = 0

    cv2.putText(img, "Score: " + str(score), (100, height - 20), font, 1,
                (255, 255, 255), 1, cv2.LINE_AA)

    if score > 8:  # Using 15 as threshold to say the driver has had his/her eyes closed for too long
        # Driver is feeling sleepy so we play the alarm
        cv2.imwrite(os.path.join(path, str(datetime.now) + '.jpg'), img)
        #playsound() # Play sound

        if thicc < 16:
            thicc += 2
        else:
            thicc -= 2
            if thicc < 2:
                thicc = 2
        cv2.readOpticalFlow(img, (0, 0), (width, height), (0, 0, 255), thicc)

    cv2.imshow('frame', img)
    k = cv2.waitKey(0)
    if k == 27:
        cv2.destroyAllWindows()
Пример #33
0
import os
import time
from flask import Flask, request, redirect, url_for
from classifier import Classifier

UPLOAD_FOLDER = 'uploads/'

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

cf = Classifier()


@app.route('/', methods=['GET', 'POST'])
def upload_file():
    if request.method == 'POST':
        file = request.files['file']
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
        file.save(filepath)
        print(filepath)
        res = cf.classify(filepath)
        print(res)
        if res is not None:
            return res
        else:
            return 'nope'
    return '''
    <!doctype html>
    <title>Upload</title>
    <h1>Upload image</h1>
    <form method=post enctype=multipart/form-data>
from classifier import Classifier
import numpy
import pandas as pd

#prepare data
data = numpy.load("data.npz")
train = data['train']
test = data['test']
labels = data['labels']

#create classifier
clf = Classifier()
clf.TreeClassifier()
clf.load_data(training=train, labels=labels, test=test)
results = clf.predict()

df = pd.read_csv("pair&average.csv", sep='\t')


def TF(x):
    if x == 0:
        return False
    else:
        return True


results = [TF(x) for x in results]
TF = pd.Series(results)
df['Need_normalize'] = TF
newdf = df[df['Need_normalize'] == True]
Пример #35
0
def main(displayHistory=True):
    #Window for past frames
    framesDiffHistory = [(getBlankFrameDiff(), getBlankFrameDiff())
                         for i in range(framesInHistory)]
    lastEyes = None

    #Load model classifier
    classifier = Classifier()
    #Start thread to make predictions
    classifier.startPredictions()

    #Initialize webcam
    vs = WebcamVideoStream(src=0).start()

    #For FPS computation
    t0 = -1

    #Face/eyes detector
    detector = Detector()

    print "Starting eye recognition..."
    while True:

        #Compute FPS
        dt = time.time() - t0
        fps = 1 / dt
        t0 = time.time()

        #Limit FPS with wait
        waitMs = 5
        key = cv2.waitKey(waitMs) & 0xFF

        #Get image from webcam, convert to grayscale and resize
        fullFrame = vs.read()
        fullFrame = cv2.cvtColor(fullFrame, cv2.COLOR_BGR2GRAY)
        frame = imutils.resize(fullFrame, width=300)

        #Find face
        faceBB = detector.getFace(frame)
        if faceBB is None:
            #Invalidate eyes bounding box as all will change
            lastEyes = None
            detector.resetEyesBB()
            continue

        #Get low resolution face coordinates
        x, y, w, h = faceBB
        face = frame[y:y + h, x:x + w]

        #Apply to high resolution frame
        xScale = fullFrame.shape[1] / frame.shape[1]
        yScale = fullFrame.shape[0] / frame.shape[0]
        x, y, w, h = x * xScale, y * yScale, w * xScale, h * yScale
        fullFace = fullFrame[y:y + h, x:x + w]

        #Find eyes on high resolution face
        eyes = detector.getEyes(fullFace)
        if eyes is None:
            #Reset last eyes
            lastEyes = None
            continue

        eye0, eye1 = eyes

        #Process (normalize, resize)
        eye0 = process(eye0)
        eye1 = process(eye1)

        #Reshape for dataset
        eye0 = np.reshape(eye0, [datasetImageSize, datasetImageSize, 1])
        eye1 = np.reshape(eye1, [datasetImageSize, datasetImageSize, 1])

        #We have a recent picture of the eyes
        if lastEyes is not None:
            #Load previous eyes
            eye0previous, eye1previous = lastEyes

            #Compute diffs
            diff0 = getDifferenceFrame(eye0, eye0previous)
            diff1 = getDifferenceFrame(eye1, eye1previous)

            #Display/debug
            displayDiff = False
            if displayDiff:
                displayCurrentDiff(eye0,
                                   eye1,
                                   eye0previous,
                                   eye1previous,
                                   stopFrame=False)

            #Crop beginning then add new to end
            framesDiffHistory = framesDiffHistory[1:]
            framesDiffHistory.append([diff0, diff1])

        #Keep current as last frame
        lastEyes = [eye0, eye1]

        #Note: this is not time consuming
        if displayHistory:
            displayHistoryDiffs(framesDiffHistory, fps)

        #Extract each eyes
        X0, X1 = zip(*framesDiffHistory)

        #Reshape as a tensor (NbExamples,SerieLength,Width,Height,Channels)
        X0 = np.reshape(X0, [
            -1,
            len(framesDiffHistory), datasetImageSize, datasetImageSize, 1
        ])
        X1 = np.reshape(X1, [
            -1,
            len(framesDiffHistory), datasetImageSize, datasetImageSize, 1
        ])

        #Save history to Classifier
        classifier.X0 = X0
        classifier.X1 = X1
Пример #36
0
class Spider(object):
    def __init__(self):
        self.verifyCodeUrl = "http://jwgl.buct.edu.cn/CheckCode.aspx"   #验证码获取地址
        self.jwglLoginUrl = "http://jwgl.buct.edu.cn/default2.aspx"     #教务网登录地址
        self.getGradeUrl = "http://jwgl.buct.edu.cn/xscjcx.aspx"        #成绩获取地址
        self.getScheduleUrl = "http://jwgl.buct.edu.cn/xskbcx.aspx"     #课程表获取地址
        self.postClassUrl = "http://jwgl.buct.edu.cn/xsxk.aspx"         #选课地址
        self.studentID = #学号
        self.username = #姓名
        self.jwglPassword = #教务网密码
        self.major = '0202高分子材料与工程'

        self.session = requests.Session()       #实例化 session 对象
        self.response = self.session.send(self.prepareJwglFirst(), timeout=5)   # GET 方法获取登录网站的 '__VIEWSTATE'

        # 实例化验证码识别器对象
        from classifier import Classifier
        self.classifier = Classifier()
        self.classifier.loadTrainingMat()

        self.remainList = [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 14]

    def formatHeaders(self, referer=None):
        """
        生成请求的 headers,referer 参数的默认值为 None
        若 referer 为 None,则 headers 不包括 referer 参数
        """
        headers = {
            'Host': 'jwgl.buct.edu.cn',
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Accept-Encoding': 'gzip, deflate, br',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Request': '1',
            }
        if referer:
            headers['Referer'] = referer
        return headers

    def getVIEWSTATE(self):
        """
        正则获取登录页面的 "__VIEWSTATE"
        """
        import re
        return re.findall('<.*name="__VIEWSTATE".*value="(.*)?".*/>', self.response.text)[0]

    def prepareJwglFirst(self):
        headers = self.formatHeaders()
        req = Request('GET', self.jwglLoginUrl, headers=headers)
        return self.session.prepare_request(req)

    def prepareJwglLogin(self):
        """
        实例化登录 jwgl 需要的 request
        """
        postdata = {
            '__VIEWSTATE': self.getVIEWSTATE(),     #此参数非常重要,通过函数从当前网页源代码获取
            'txtUserName': self.studentID,
            'TextBox2': self.jwglPassword,
            'txtSecretCode': self.verCode,
            'RadioButtonList1': '学生',
            'Button1': '',
            'lbLanguage': '',
            'hidPdrs': '',
            'hidsc': '',
        }
        headers = self.formatHeaders(self.jwglLoginUrl)
        req = Request('POST', self.jwglLoginUrl, headers=headers, data=postdata)
        return self.session.prepare_request(req)

    def prepareGetGrade(self):
        headers = self.formatHeaders(self.response.url)
        params = {
            'xh': self.studentID,
            'xm': self.username,
            'gnmkdm': 'N121605',
        }
        req = Request('GET', self.getGradeUrl, headers=headers, params=params)
        return self.session.prepare_request(req)

    def preparePastGrade(self):
        headers = self.formatHeaders(self.response.url)
        params = {
            'xh': self.studentID,
            'xm': self.username,
            'gnmkdm': 'N121605',
        }
        postdata = {
            '__EVENTTARGET': '',
            '__EVENTARGUMENT': '',
            '__VIEWSTATE': self.getVIEWSTATE(),         #此参数非常重要,通过函数从当前网页源代码获取
            'hidLanguage': '',
            'ddlXN': '',
            'ddlXQ': '',
            'ddl_kcxz': '',
            'btn_zcj': '历年成绩',
        }
        req = Request('POST', self.getGradeUrl, headers=headers, params=params, data=postdata)
        return self.session.prepare_request(req)

    def prepareSchedule(self):
        headers = self.formatHeaders(self.response.url)
        params = {
            'xh': self.studentID,
            'xm': self.username,
            'gnmkdm': 'N121603',
        }
        req = Request('GET', self.getScheduleUrl, headers=headers, params=params)
        return self.session.prepare_request(req)

    def preparePastSchedule(self, xn_, xq_):
        headers = self.formatHeaders(self.response.url)
        params = {
            'xh': self.studentID,
            'xm': self.username,
            'gnmkdm': 'N121603',
        }
        postdata = {
            '__EVENTTARGET': 'xnd',
            '__EVENTARGUMENT': '',
            '__VIEWSTATE': self.getVIEWSTATE(),         #此参数非常重要,通过函数从当前网页源代码获取
            'xnd': xn_,
            'xqd': xq_,
        }
        req = Request('POST', self.getScheduleUrl, headers=headers, params=params, data=postdata)
        return self.session.prepare_request(req)

    def prepareClass(self):
        headers = self.formatHeaders(self.response.url)
        params = {
            'xh': self.studentID,
            'xm': self.username,
            'gnmkdm': 'N121101',
        }
        req = Request('GET', self.postClassUrl, headers=headers, params=params)
        return self.session.prepare_request(req)

    def prepareGetClass(self):
        headers = self.formatHeaders(self.response.url)
        params = {
            'xh': self.studentID,
            'xm': self.username,
            'gnmkdm': 'N121101',
        }
        postdata = {
            '__EVENTTARGET': '',
            '__EVENTARGUMENT': '',
            '__VIEWSTATE': self.getVIEWSTATE(),         #此参数非常重要,通过函数从当前网页源代码获取
            'DrDl_Nj': self.studentID[:4],
            'zymc': self.major + '主修专业||' + self.studentID[:4],
            'xx': '',
            'Button5': '本专业选课'
        }
        req = Request('POST', self.postClassUrl, headers=headers, params=params, data=postdata)
        return self.session.prepare_request(req)

    def jwglLogin(self, tryNum=10):
        """
        教务网登录函数
        tryNum --> 尝试登录的最大次数,防止因递归深度过大导致溢出
        """
        import re
        tryNum -= 1
        if tryNum < 0:
            print('\n*** stack overflow! exiting...')
            exit(0)

        codeImg = self.session.get(self.verifyCodeUrl, timeout=5)       #获取验证码图片
        with open('check.gif', 'wb') as fr:                             #保存验证码图片
            for chunk in codeImg:
                fr.write(chunk)
        self.verCode = self.classifier.recognizer("check.gif")          #识别验证码

        try:
            self.response = self.session.send(self.prepareJwglLogin(), timeout=5)
            if re.search(self.studentID, self.response.url):            #若 response.url 中匹配到学号,则认为登录成功
                print("login successfully!")
                print(self.response.url)
            else:
                raise VerifyError("Wrong Verification code!")
        except VerifyError as e:
            print(e)
            print("retry...")
            self.jwglLogin(tryNum)      #若登录不成功则递归调用自身

    def getPastGrade(self):
        """
        获取历年成绩
        """
        self.response = self.session.send(self.prepareGetGrade(), timeout=5)
        self.response = self.session.send(self.preparePastGrade(), timeout=5)
        gradeMat = self.formatTable(self.response.text)
        gradeMat = [[row[i] for i in range(len(row)) if i in self.remainList] for row in gradeMat]
        self.outputTable(gradeMat, outputPath='grade.md')

    def getPastSchedule(self, xn_ ,xq_):
        self.response = self.session.send(self.prepareSchedule(), timeout=5)
        self.response = self.session.send(self.preparePastSchedule(xn_, xq_), timeout=5)
        scheduleMat = self.formatTable(self.response.text)
        with open('schedule.md', 'w') as fr:
            fr.write(str(scheduleMat))
        #self.outputTable(scheduleMat, outputPath='schedule.md')

    def getClassList(self):
        self.response = self.session.send(self.prepareClass(), timeout=5)
        self.response = self.session.send(self.prepareGetClass(), timeout=5)

    def outputTable(self, tableMat, outputPath):
        """
        将成绩输出成 md 格式
        """
        tableMat.insert(1, [':------' for i in range(len(tableMat[0]))])
        with open(outputPath, 'w') as fr:
            for row in tableMat:
                fr.write('|')
                for each in row:
                    fr.write(each)
                    fr.write('|')
                fr.write('\n')

    def formatTable(self, tableBody):
        """
        将抓取到的成绩解析成列表
        """
        from bs4 import BeautifulSoup
        import re
        soup = BeautifulSoup(tableBody, 'html.parser')
        return soup.br.table
        tableRow = soup.br.table.find_all('tr')
        tableMat = [i.find_all('td') for i in tableRow]
        return [[each.get_text().strip() for each in row] for row in tableMat]

    def clean(self):
        """
        爬取结束关闭会话
        """
        self.session.close()
Пример #37
0
from flask_socketio import SocketIO

import socket
from PIL import Image
import numpy as np
import struct
import sys
import time
import log
import os

from classifier import Classifier
from datastore import DataStore


classifier = Classifier(16, 16, 3)
app = Flask(__name__, static_url_path='')
socketio = SocketIO(app)
ds = DataStore('/var/pood/ds')
last_frame_time = 0
frames_received = 100
positives_last_frame = 0
force_training = False
collecting_negs = False

def has_cli_arg(arg_str):
    return arg_str in sys.argv


def classify_req(sock):
    global last_frame_time, frames_received, positives_last_frame, collecting_negs
Пример #38
0
from itertools import count

import torch
from torchvision import utils
import random
import glob
from shutil import copyfile
from mask_loader import load_image

from classifier import Classifier

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
CLASSIFIER_FILENAME = 'trained_models/classifier.to'

classifier = Classifier()
classifier.cuda()
classifier.load_state_dict(torch.load(CLASSIFIER_FILENAME))
classifier.eval()

file_names = glob.glob('data/raw/**.jpg', recursive=True)

while True:
    file_name = random.choice(file_names)
    hash = file_name.split('/')[-1][:-4]

    image = load_image(file_name).to(device)
    image = classifier.apply(image)

    if image is None:
        continue
Пример #39
0
    # game stuff
    from game_logic import GameLogic
    from levels import LevelMic
    from path_collector import PathCollector

    # yaml config file
    cfg = yaml.safe_load(open("../config.yaml"))

    # init path collector
    path_coll = PathCollector(cfg, root_path='.')

    # --
    # mic

    # create classifier
    classifier = Classifier(path_coll=path_coll, verbose=True)

    # create mic instance
    mic = Mic(classifier=classifier,
              feature_params=cfg['feature_params'],
              mic_params=cfg['mic_params'],
              is_audio_record=True)

    # --
    # game setup

    # init pygame
    pygame.init()

    # init display
    screen = pygame.display.set_mode(cfg['game']['screen_size'])
Пример #40
0
def train():
    if args.dataset == 'baidu_VH':
        dataset = baidu_VH(PROJECT_METAROOT)
    elif args.dataset == 'summe':
        pass
        #dataset=
    else:
        raise ValueError('No such dataset')
    log.l.info(dataset.print_info())
    train_data = AsyncReader(dataset,
                             root_path=BAIDU_VH_ROOT,
                             mode='train',
                             modality=args.modality)
    train_data.set_params({
        'limitedfiles':
        None,
        'sample_rate':
        100,
        'save_path':
        'tmp_results/train_{}_sampled.pkl'.format(args.modality)
    })
    X_train, Y_train = train_data.read_data(k=args.thread)

    val_data = AsyncReader(dataset,
                           root_path=BAIDU_VH_ROOT,
                           mode='val',
                           modality=args.modality)
    val_data.set_params({
        'limitedfiles':
        None,
        'sample_rate':
        1,
        'save_path':
        'tmp_results/val_{}_sampled.pkl'.format(args.modality)
    })
    X_val, Y_val = val_data.read_data(k=args.thread)

    model = Classifier(model_name=args.model_name,
                       if_grid_search=args.if_grid_search,
                       model_kernel=args.model_kernel)
    if args.if_grid_search:
        model.set_grid_search_params(grid_search_params[args.model_name])
        X_train_grid_search, Y_train_grid_search = Sample_data(
            X_train, Y_train, args.grid_search_sample_rate)
        model.grid_search(X_train_grid_search, Y_train_grid_search)
    model.fit(X_train, Y_train)

    X_val_metric, Y_val_metric = Sample_data(X_val, Y_val, 0.1)
    predict_val = model.predict(X_val_metric)
    metrics = get_metrics(predict_val, Y_val_metric, metrics=METRICS)
    # print metrics
    log.l.info('the metrics of {} is :{}'.format(METRICS, metrics))
    del X_train, Y_train  #,X_train_grid_search,Y_train_grid_search,X_val_metric,Y_val_metric
    if args.create_curves:
        # for test set:
        val_curves_dic = dict()
        for k, v in val_data.data_dic.items():
            val_curves_dic[k] = model.predict(v)

        test_data = AsyncReader(dataset,
                                root_path=BAIDU_VH_ROOT,
                                mode='test',
                                modality=args.modality)
        test_data.set_params({
            'limitedfiles':
            None,
            'sample_rate':
            1,
            'save_path':
            'tmp_results/test_{}_sampled.pkl'.format(args.modality)
        })
        _, _ = test_data.read_data(k=args.thread)

        test_curves_dic = dict()
        for k, v in test_data.data_dic.items():
            test_curves_dic[k] = model.predict(v)
        return_info = {'val': val_curves_dic, 'test': test_curves_dic}
        if args.save_curves:
            joblib.dump(
                return_info,
                'tmp_results/val_test_{}_curves.pkl'.format(args.modality))
        return return_info
    return None
Пример #41
0
# Library import
import uvicorn
from fastapi import FastAPI, HTTPException, Request
from fastapi.templating import Jinja2Templates
from fastapi.responses import HTMLResponse
from classifier import Classifier
from helper import SentimentRequest, SentimentResponse

# Create APP intance of FastAPI
app = FastAPI()
model = Classifier()
templates = Jinja2Templates(directory="templates")

# Index route. Default: http://127.0.0.1:8000
@app.get("/", response_class=HTMLResponse)
async def read_item(request: Request):
    context = {
        "request" : request,
        'title' : "Form Input for News Classifier"
    }
    return templates.TemplateResponse("index.html", context=context)

@app.post('/predict/', response_model=SentimentResponse, status_code=200)
async def predict_text(request: SentimentRequest):

    if not model:
        raise HTTPException(status_code=404, detail="Model not found.")

    pred = model.process(request.text)

    return SentimentResponse(text=request.text, prediction=pred)
Пример #42
0
from classifier import Classifier
import time
from flask import Flask, render_template, request
app = Flask(__name__)

print("Load classifier")
start_time = time.time()
classifier = Classifier()
print("Classifier is successfully loaded")
print(time.time() - start_time, "seconds")


@app.route("/", methods=["POST", "GET"])
def index_page(text="", prediction_message=""):
    if request.method == "POST":
        text = request.form["text"]

        prediction_message = classifier.get_result_message(text)

    return render_template('simple_page.html',
                           text=text,
                           prediction_message=prediction_message)


if __name__ == "__main__":
    app.run(host='127.0.0.1', port=8080, debug=True)
Пример #43
0
# Chunks should never include caption data from multiple videos, cut off before 500 if at the end of the caption, then start again for the next caption.

# Idea: after question answering, compare the comment's video-ID to the video-ID of the caption chunk which answered the question.
# this will give us an idea of where the answers are coming from (i.e. is it always from the associated video or not)

# Change these to get best results with a static response to positive and negative comments.
positive_threshold = 0.75
negative_threshold = -0.75

####################################
#                                  #
#           Object Calls           #
#                                  #
####################################

classifier = Classifier()  # Question classifier class.
YTObj = CommentCollection(API_SERVICE_NAME, API_VERSION,
                          DEVELOPER_KEY)  # youtube acess object

####################################
#                                  #
#          Main Code Body          #
#                                  #
####################################

# dataframe for comments and captions with format
"""
---------------
|              | 'commentList' | 'captionList' |
| VIDEO_IDS[0] |    list(...)  |    list(...)  |
                      ...
Пример #44
0
    """

    os.environ['PYTHONHASHSEED'] = '0'
    np.random.seed(17)
    rn.seed(12345)


if __name__ == "__main__":
    set_reproductible()
    datadir = "../data/"
    trainfile = datadir + "traindata.csv"
    devfile = datadir + "devdata.csv"
    testfile = None
    # Basic checking
    start_time = time.perf_counter()
    classifier = Classifier()
    print("\n")
    # Training
    print("1. Training the classifier...\n")
    classifier.train(trainfile)
    # Evaluation on the dev dataset
    print("\n2. Evaluation on the dev dataset...\n")
    slabels = classifier.predict(devfile)
    glabels = load_label_output(devfile)
    eval_list(glabels, slabels)
    if testfile is not None:
        # Evaluation on the test data
        print("\n3. Evaluation on the test dataset...\n")
        slabels = classifier.predict(testfile)
        glabels = load_label_output(testfile)
        eval_list(glabels, slabels)
Пример #45
0
from flask import Flask, request
import sys
sys.path.append('./scripts/classifier')
sys.path.append('./scripts/server/sql')
from classifier import Classifier
import utils, instructor, course, program_outcomes, learning_objectives, lab_schedule, \
    assignment_schedule, project_schedule, mid_term_schedule, final_exam_schedule, \
    course_grading

txt_clf = Classifier()
app = Flask(__name__)


# http://localhost:5000/classify?text=who is doing it
@app.route("/classify")
def classify():
    text = request.args.get('text')
    text = text.lower()
    text = utils.map_words_to_digits_in_text(text)
    question_type = txt_clf.classify(text)
    return _return_response_for_question(text, question_type)


def _return_response_for_question(text, label):
    if label == 'instructor':
        return instructor.get_instructor_details(text)
    elif label == 'course_name':
        return course.get_course_details(text)
    elif label == 'course_learning_objectives':
        return learning_objectives.get_learning_objectives(text)
    elif label == 'program_outcome':
Пример #46
0
def main(is_interactive=True,
         k=64,
         des_option=constants.ORB_FEAT_OPTION,
         svm_kernel=cv2.SVM_LINEAR):
    if not is_interactive:
        experiment_start = time.time()
    # Check for the dataset of images
    if not os.path.exists(constants.DATASET_PATH):
        print("Dataset not found, please copy one.")
        return
    dataset = Dataset(constants.DATASET_PATH)
    dataset.generate_sets()

    # Check for the directory where stores generated files
    if not os.path.exists(constants.FILES_DIR_NAME):
        os.makedirs(constants.FILES_DIR_NAME)

    if is_interactive:
        des_option = input(
            "Enter [1] for using ORB features or [2] to use SIFT features.\n")
        k = input(
            "Enter the number of cluster centers you want for the codebook.\n")
        svm_option = input(
            "Enter [1] for using SVM kernel Linear or [2] to use RBF.\n")
        svm_kernel = cv2.SVM_LINEAR if svm_option == 1 else cv2.SVM_RBF

    des_name = constants.ORB_FEAT_NAME if des_option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME

    log = Log(k, des_name, svm_kernel)

    codebook_filename = filenames.codebook(k, des_name)
    if is_interactive:
        codebook_option = input(
            "Enter [1] for generating a new codebook or [2] to load one.\n")
    else:
        codebook_option = constants.GENERATE_OPTION
    if codebook_option == constants.GENERATE_OPTION:
        # Calculate all the training descriptors to generate the codebook
        start = time.time()
        des = descriptors.all_descriptors(dataset, dataset.get_train_set(),
                                          des_option)
        end = time.time()
        log.train_des_time(end - start)
        # Generates the codebook using K Means
        print("Generating a codebook using K-Means with k={0}".format(k))
        start = time.time()
        codebook = descriptors.gen_codebook(dataset, des, k)
        end = time.time()
        log.codebook_time(end - start)
        # Stores the codebook in a file
        utils.save(codebook_filename, codebook)
        print("Codebook saved in {0}".format(codebook_filename))
    else:
        # Load a codebook from a file
        print("Loading codebook ...")
        codebook = utils.load(codebook_filename)
        print("Codebook with shape = {0} loaded.".format(codebook.shape))

    # Train and test the dataset
    classifier = Classifier(dataset, log)
    svm = classifier.train(svm_kernel,
                           codebook,
                           des_option=des_option,
                           is_interactive=is_interactive)
    print("Training ready. Now beginning with testing")
    result, labels = classifier.test(codebook,
                                     svm,
                                     des_option=des_option,
                                     is_interactive=is_interactive)

    # Store the results from the test
    classes = dataset.get_classes()
    log.classes(classes)
    log.classes_counts(dataset.get_classes_counts())
    result_filename = filenames.result(k, des_name, svm_kernel)
    test_count = len(dataset.get_test_set()[0])
    result_matrix = np.reshape(result, (len(classes), test_count))
    utils.save_csv(result_filename, result_matrix)

    # Create a confusion matrix
    confusion_matrix = np.zeros((len(classes), len(classes)), dtype=np.uint32)
    for i in range(len(result)):
        predicted_id = int(result[i])
        real_id = int(labels[i])
        confusion_matrix[real_id][predicted_id] += 1

    print("Confusion Matrix =\n{0}".format(confusion_matrix))
    log.confusion_matrix(confusion_matrix)
    log.save()
    print("Log saved on {0}.".format(filenames.log(k, des_name, svm_kernel)))
    if not is_interactive:
        experiment_end = time.time()
        elapsed_time = utils.humanize_time(experiment_end - experiment_start)
        print("Total time during the experiment was {0}".format(elapsed_time))
    else:
        # Show a plot of the confusion matrix on interactive mode
        utils.show_conf_mat(confusion_matrix)
        raw_input("Press [Enter] to exit ...")
Пример #47
0
        wordCounter[word] += 1
    else:
        wordCounter[word] = 1
popularWords = sorted(wordCounter, key = wordCounter.get, reverse = True)
lexicon = popularWords[:4000]
 # After learning lexicon here OOV words are replaced by UNK sign
trainingSet0 = unkWord(trainingSet0, lexicon)
trainingSet1 = unkWord(trainingSet1, lexicon)
# positive and negative tweets are passed to training onject
for word in trainingSet0:
    tweetTrainer.train(word, '0')
for word in trainingSet1:
    tweetTrainer.train(word, '1')
    
# a classifier instance
sentimentClassifier = Classifier(tweetTrainer.data, tokenizer.Tokenizer(stop_words = [], signs_to_remove = []))
# storage of lexicon and sentiment classifier on disk
c = open('sentimentClassifier.pickle', 'wb')
l = open('lexicon.pickle', 'wb')
pickle.dump(sentimentClassifier, c)
pickle.dump(lexicon, l)
c.close()
l.close()
file.close()

# test section
# loading lexicon and sentimentClassifier for evaluation over random samples
# samples are chosen randomly and not within training samples
c = open('sentimentClassifier.pickle', 'rb')
l = open('lexicon.pickle', 'rb')
fileEval=open('test2.csv', 'r', encoding="Latin-1")#.csv
Пример #48
0
    "Test10(3Good3Bad).mp4"
    # "Detector/DLTest.mp4"
    # "Train1(1Good5Bad).mp4",
    # "Train2(1Good6Bad).mp4"
    # "Train3(1Good5Bad).mp4",
    # "Train4(1Good5Bad).mp4",
]

for video in videoList:
    videoPath = "/home/eamonn/FYP/Videos/" + video

    gymObjects = {
        'Gym_Plate': {
            'Location': '',
            'Frame': 0
        },
        'FootWear': {
            'Location': [],
            'Frame': 0
        }
        # 'Person': {'Location': [],
        #                   'Frame': 0}
    }

    classifier = Classifier()
    classifier.createDecisionTreeClassifier()
    god = GymObjectDetector(gymObjects, videoPath)
    trackedObjects = god.getNormalisedObjectLocations()
    CSRTTracker = MultiTracker(gymObjects, videoPath, classifier)
    barbellPosition, footwearPosition = CSRTTracker.displayAndTrack()
Пример #49
0
class FitAndPredict:
    
    '''
    Class contains function for the training and classification pipeline
    '''
    
    def __init__(self):
        self.train_file = config.TRAIN_FILE
        self.test_file = config.TEST_FILE
        self.predicted_test_file = config.PREDICTED_TEST_FILE
        self.model_folder = config.MODELS_FOLDER
        self.target_map = config.TARGET_MAP
        self.map_sensors = config.MAP_SENSORS
        self.load_cell_theshold = config.LOAD_CELL_THRESHOLD
        self.weight_threshold = config.WEIGHT_THRESHOLD
        self.outliers_threshold = config.OUTLIERS_THRESHOLD
        self.feature_names = config.FEATURE_NAMES
        self.dev_map = config.DEV_MAP
        self.plank_dict = config.PLANK_DICT
        self.position_to_remove = config.POSITION_TO_REMOVE
        self.sensor_details_file = sensor_details_file
        self.norm_sensor_details_file = norm_sensor_details_file
        self.random_state = config.RANDOM_STATE
        self.min_samples_split = config.MIN_SAMPLES_SPLIT
        self.min_samples_leaf = config.MIN_SAMPLES_LEAF
        self.n_estimators = config.N_ESTIMATORS
        self.model_name = model_name
        self.target_column = config.TARGET_COLUMN
        self.preprocess = PreProcess(self.load_cell_theshold, self.weight_threshold, self.outliers_threshold, \
                                     self.map_sensors, self.target_map, self.position_to_remove, \
                                     self.sensor_details_file, self.norm_sensor_details_file, self.model_folder)
        self.fe = FeatureExtractor(self.plank_dict)
        
        
    def read_train_data(self):
        
        '''
        Function to read the train data in the training pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start read_train_data()')
        try:
            self.input_data = read_csv(self.train_file)
            logging.debug(__name__ + ' shape : ' + str(self.input_data.shape))
            logging.debug(__name__ + ' : ' + ' End read_train_data()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Input file not found ')
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass 
        
    def check_train_data(self):
        
        '''
        Function to check if all load cell columns and target column are present in the train data
        '''

        logging.debug(__name__ + ' : ' + ' Start check_train_data()')        
        try:
            train_columns = self.input_data.columns.values
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        # check if all load cell columns are present in the train data

        try:
            if (set(self.map_sensors.keys()).issubset(set(train_columns))) or \
            (set(self.map_sensors.values()).issubset(set(train_columns))):
                pass
            else:
                print ("LOAD CELL COLUMNS NOT PRESENT IN TRAIN DATA")
                logging.debug(__name__ + ' : ' + ' LOAD CELL COLUMNS NOT PRESENT IN TRAIN DATA')
                logging.debug(__name__ + ' : ' + ' End read_train_data()')
                sys.exit()
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
            
        # check if target column is present in the data
        
        try:
            if (set([self.target_column]).issubset(set(train_columns))):
                pass
            else:
                print ("TARGET COLUMN NOT PRESENT IN TRAIN DATA")
                logging.debug(__name__ + ' : ' + ' TARGET COLUMN NOT PRESENT IN TRAIN DATA')
                logging.debug(__name__ + ' : ' + ' End read_train_data()')
                sys.exit()
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        # check if all 5 positions are present in target column of train data

        try:
            if (set(self.input_data[self.target_column].unique()).issubset(set([1, 2, 3, 4, 5]))):
                pass
            else:
                print ("VALUES OTHER THAN PRESPECIFIED POSITION VALUES PRESENT IN TRAIN DATA")
                logging.debug(__name__ + ' : ' + ' VALUES OTHER THAN PRESPECIFIED POSITION VALUES PRESENT IN TRAIN DATA')
                logging.debug(__name__ + ' : ' + ' End read_train_data()')
                sys.exit()
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        logging.debug(__name__ + ' : ' + ' End check_train_data()')
        
        return 
        
    def preprocess_train_data(self):
        
        '''
        Function to preprocess the train data in the training pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start preprocess_train_data()')
        
        try:
            logging.debug(__name__ + ' : ' + ' Start rename_columns_if_needed()')
            self.preprocessed_input_data = self.preprocess.rename_columns_if_needed(self.input_data)
            logging.debug(__name__ + ' : ' + ' End rename_columns_if_needed()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start rem_missing_train()')
            self.preprocessed_input_data = self.preprocess.rem_missing_train(self.preprocessed_input_data)
            logging.debug(__name__ + ' : ' + ' End rem_missing_train()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start rem_load_cell_threshold()')
            self.preprocessed_input_data = self.preprocess.rem_load_cell_threshold(self.preprocessed_input_data)
            logging.debug(__name__ + ' : ' + ' End rem_load_cell_threshold()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start rem_less_weights()')
            self.preprocessed_input_data = self.preprocess.rem_less_weights(self.preprocessed_input_data)
            logging.debug(__name__ + ' : ' + ' End rem_less_weights()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start rem_sitting()')
            self.preprocessed_input_data = self.preprocess.rem_sitting(self.preprocessed_input_data)
            logging.debug(__name__ + ' : ' + ' End rem_sitting()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start normalize()')
            self.preprocessed_input_data = self.preprocess.normalize(self.preprocessed_input_data)
            logging.debug(__name__ + ' : ' + ' End normalize()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start treat_outliers_train()')
            self.preprocessed_input_data = self.preprocess.treat_outliers_train(self.preprocessed_input_data)
            logging.debug(__name__ + ' : ' + ' End treat_outliers_train()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        logging.debug(__name__ + ' shape : ' + str(self.preprocessed_input_data.shape))
        logging.debug(__name__ + ' : ' + ' End preprocess_train_data()')

        return 
        
    def read_test_data(self):
        
        '''
        Function to read the test data in the classification pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start read_test_data()')
        try:
            self.test_data = read_csv(self.test_file, header = None)
            logging.debug(__name__ + ' shape : ' + str(self.test_data.shape))
            logging.debug(__name__ + ' : ' + ' End read_test_data()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Test file not found ')
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            return
        
    def check_test_data(self):
        
        '''
        Function to check if all load cell columns are present in the test data
        '''
        logging.debug(__name__ + ' : ' + ' Start check_test_data()')

        # check if all load cell columns are present in the test data
        #try:
            #test_columns = self.test_data.columns
        #except Exception as e:
            #logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        #finally:
            #pass
        try:
            #if (set(self.map_sensors.keys()).issubset(set(test_columns))) or (set(self.map_sensors.values()).issubset(set(test_columns))):
            test_columns = ['LC' + str(x) for x in range(1, 17)]
            if self.test_data.shape[1] == len(test_columns):
            	self.test_data.columns = test_columns
            	pass
            else:
                print ("TEST DATA DO NOT HAVE ALL LOAD CELLS DATA")
                logging.debug(__name__ + ' : ' + ' TEST DATA DO NOT HAVE ALL LOAD CELLS DATA')
                logging.debug(__name__ + ' : ' + ' End check_test_data()')
                sys.exit()
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        logging.debug(__name__ + ' : ' + ' End check_test_data()')

        return 
        
        
    def preprocess_test_data(self):
        
        '''
        Function to preprocess the test data in the classification pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start preprocess_test_data()')

        try:
        	self.preprocessed_test_data = self.test_data
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        #try:
        #    logging.debug(__name__ + ' : ' + ' Start rename_columns_if_needed()')
        #    self.preprocessed_test_data = self.preprocess.rename_columns_if_needed(self.test_data)
        #    logging.debug(__name__ + ' : ' + ' End rename_columns_if_needed()')
        #except Exception as e:
        #    logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        #finally:
        #    pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start treat_missing_test()')
            self.preprocessed_test_data = self.preprocess.treat_missing_test(self.preprocessed_test_data)
            logging.debug(__name__ + ' : ' + ' End treat_missing_test()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            logging.debug(__name__ + ' : ' + ' Start normalize()')
            self.preprocessed_test_data = self.preprocess.normalize(self.preprocessed_test_data)
            logging.debug(__name__ + ' : ' + ' End normalize()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        try:
            logging.debug(__name__ + ' : ' + ' Start treat_outliers_test()')
            self.preprocessed_test_data = self.preprocess.treat_outliers_test(self.preprocessed_test_data)
            logging.debug(__name__ + ' : ' + ' End treat_outliers_test()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        logging.debug(__name__ + ' shape : ' + str(self.preprocessed_test_data.shape))
        logging.debug(__name__ + ' : ' + ' End preprocess_test_data()')
        
        return
        
    def transform_train_data_into_features(self):
        
        '''
        Function to create features from train data in the training pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start transform_train_data_into_features()')

        try:
            # left_sensors_pct
            logging.debug(__name__ + ' : ' + ' Start left_percent()')
            self.preprocessed_input_data['left_sensors_pct'] = self.preprocessed_input_data.apply(lambda x : self.fe.left_percent(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End left_percent()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            #  plank_1_std
            logging.debug(__name__ + ' : ' + ' Start plank_1_std_cal()')
            self.preprocessed_input_data['plank_1_std'] = self.preprocessed_input_data.apply(lambda x : self.fe.plank_1_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_1_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_2_std
            logging.debug(__name__ + ' : ' + ' Start plank_2_std_cal()')
            self.preprocessed_input_data['plank_2_std'] = self.preprocessed_input_data.apply(lambda x : self.fe.plank_2_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_2_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_3_std
            logging.debug(__name__ + ' : ' + ' Start plank_3_std_cal()')
            self.preprocessed_input_data['plank_3_std'] = self.preprocessed_input_data.apply(lambda x : self.fe.plank_3_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_3_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_4_std
            logging.debug(__name__ + ' : ' + ' Start plank_4_std_cal()')
            self.preprocessed_input_data['plank_4_std'] = self.preprocessed_input_data.apply(lambda x : self.fe.plank_4_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_4_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_1_x
            logging.debug(__name__ + ' : ' + ' Start get_com_1_x()')
            self.preprocessed_input_data['plank_1_com_x'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_1_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_1_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_2_x
            logging.debug(__name__ + ' : ' + ' Start get_com_2_x()')
            self.preprocessed_input_data['plank_2_com_x'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_2_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_2_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:        
            # com_3_x
            logging.debug(__name__ + ' : ' + ' Start get_com_3_x()')
            self.preprocessed_input_data['plank_3_com_x'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_3_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_3_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_4_x
            logging.debug(__name__ + ' : ' + ' Start get_com_4_x()')
            self.preprocessed_input_data['plank_4_com_x'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_4_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_4_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_1_y
            logging.debug(__name__ + ' : ' + ' Start get_com_1_y()')
            self.preprocessed_input_data['plank_1_com_y'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_1_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_1_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_2_y
            logging.debug(__name__ + ' : ' + ' Start get_com_2_y()')
            self.preprocessed_input_data['plank_2_com_y'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_2_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_2_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_3_y
            logging.debug(__name__ + ' : ' + ' Start get_com_3_y()')
            self.preprocessed_input_data['plank_3_com_y'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_3_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_3_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_4_y
            logging.debug(__name__ + ' : ' + ' Start get_com_4_y()')
            self.preprocessed_input_data['plank_4_com_y'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_com_4_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_4_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # y_errors
            logging.debug(__name__ + ' : ' + ' Start get_errors_from_fitted_line()')
            self.preprocessed_input_data['y_errors'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_errors_from_fitted_line(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_errors_from_fitted_line()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_3_deviation from fitted line through COMs of first two planks
            logging.debug(__name__ + ' : ' + ' Start get_deviation_plank_3()')
            self.preprocessed_input_data['plank_3_dev'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_deviation_plank_3(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_deviation_plank_3()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_4_deviation from fitted line through COMs of first two planks
            logging.debug(__name__ + ' : ' + ' Start get_deviation_plank_4()')
            self.preprocessed_input_data['plank_4_dev'] = self.preprocessed_input_data.apply(lambda x: self.fe.get_deviation_plank_4(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_deviation_plank_4()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_3_dev_bucket
            logging.debug(__name__ + ' : ' + ' Start bucketize_plank_3_dev()')
            self.preprocessed_input_data['plank_3_dev_bucket'] = self.preprocessed_input_data['plank_3_dev'].apply(lambda x: self.fe.bucketize_plank_dev(x))
            self.preprocessed_input_data['plank_3_dev_bucket'] = self.preprocessed_input_data['plank_3_dev_bucket'].apply(lambda x: self.dev_map[x])
            logging.debug(__name__ + ' : ' + ' End bucketize_plank_3_dev()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_4_dev_bucket
            logging.debug(__name__ + ' : ' + ' Start bucketize_plank_4_dev()')
            self.preprocessed_input_data['plank_4_dev_bucket'] = self.preprocessed_input_data['plank_4_dev'].apply(lambda x: self.fe.bucketize_plank_dev(x))
            self.preprocessed_input_data['plank_4_dev_bucket'] = self.preprocessed_input_data['plank_4_dev_bucket'].apply(lambda x: self.dev_map[x])
            logging.debug(__name__ + ' : ' + ' End bucketize_plank_4_dev()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        try:
        	# plank_4_deviation from fitted line through COMs of 2nd and 3rd planks
            logging.debug(__name__ + ' : ' + ' Start plank_4_wrt_3_2()')
            self.preprocessed_input_data['plank_4_wrt_3_2'] = self.preprocessed_input_data.apply(lambda x: self.fe.plank_4_wrt_3_2(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_4_wrt_3_2()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass


        logging.debug(__name__ + ' shape : ' + str(self.preprocessed_input_data.shape))
        logging.debug(__name__ + ' : ' + ' End transform_train_data_into_features()')
        
        return
        
    def transform_test_data_into_features(self):
        
        '''
        Function to create features from test data in the classification pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start transform_test_data_into_features()')

        try:
            # left_sensors_pct
            logging.debug(__name__ + ' : ' + ' Start left_percent()')
            self.preprocessed_test_data['left_sensors_pct'] = self.preprocessed_test_data.apply(lambda x : self.fe.left_percent(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End left_percent()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_1_std
            logging.debug(__name__ + ' : ' + ' Start plank_1_std_cal()')
            self.preprocessed_test_data['plank_1_std'] = self.preprocessed_test_data.apply(lambda x : self.fe.plank_1_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_1_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_2_std
            logging.debug(__name__ + ' : ' + ' Start plank_2_std_cal()')
            self.preprocessed_test_data['plank_2_std'] = self.preprocessed_test_data.apply(lambda x : self.fe.plank_2_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_2_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_3_std
            logging.debug(__name__ + ' : ' + ' Start plank_3_std_cal()')
            self.preprocessed_test_data['plank_3_std'] = self.preprocessed_test_data.apply(lambda x : self.fe.plank_3_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_3_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_4_std
            logging.debug(__name__ + ' : ' + ' Start plank_4_std_cal()')
            self.preprocessed_test_data['plank_4_std'] = self.preprocessed_test_data.apply(lambda x : self.fe.plank_4_std_cal(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_4_std_cal()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_1_x
            logging.debug(__name__ + ' : ' + ' Start get_com_1_x()')
            self.preprocessed_test_data['plank_1_com_x'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_1_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_1_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_2_x
            logging.debug(__name__ + ' : ' + ' Start get_com_2_x()')
            self.preprocessed_test_data['plank_2_com_x'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_2_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_2_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_3_x
            logging.debug(__name__ + ' : ' + ' Start get_com_3_x()')
            self.preprocessed_test_data['plank_3_com_x'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_3_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_3_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_4_x
            logging.debug(__name__ + ' : ' + ' Start get_com_4_x()')
            self.preprocessed_test_data['plank_4_com_x'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_4_x(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_4_x()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_1_y
            logging.debug(__name__ + ' : ' + ' Start get_com_1_y()')
            self.preprocessed_test_data['plank_1_com_y'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_1_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_1_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_2_y
            logging.debug(__name__ + ' : ' + ' Start get_com_2_y()')
            self.preprocessed_test_data['plank_2_com_y'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_2_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_2_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_3_y
            logging.debug(__name__ + ' : ' + ' Start get_com_3_y()')
            self.preprocessed_test_data['plank_3_com_y'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_3_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_3_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # com_4_y
            logging.debug(__name__ + ' : ' + ' Start get_com_4_y()')
            self.preprocessed_test_data['plank_4_com_y'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_com_4_y(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_com_4_y()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # y_errors
            logging.debug(__name__ + ' : ' + ' Start get_errors_from_fitted_line()')
            self.preprocessed_test_data['y_errors'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_errors_from_fitted_line(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_errors_from_fitted_line()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_3_deviation from fitted line through COMs of first two planks
            logging.debug(__name__ + ' : ' + ' Start get_deviation_plank_3()')
            self.preprocessed_test_data['plank_3_dev'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_deviation_plank_3(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_deviation_plank_3()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_4_deviation from fitted line through COMs of first two planks
            logging.debug(__name__ + ' : ' + ' Start get_deviation_plank_4()')
            self.preprocessed_test_data['plank_4_dev'] = self.preprocessed_test_data.apply(lambda x: self.fe.get_deviation_plank_4(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End get_deviation_plank_4()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_3_dev_bucket
            logging.debug(__name__ + ' : ' + ' Start bucketize_plank_3_dev()')
            self.preprocessed_test_data['plank_3_dev_bucket'] = self.preprocessed_test_data['plank_3_dev'].apply(lambda x: self.fe.bucketize_plank_dev(x))
            self.preprocessed_test_data['plank_3_dev_bucket'] = self.preprocessed_test_data['plank_3_dev_bucket'].apply(lambda x: self.dev_map[x])
            logging.debug(__name__ + ' : ' + ' End bucketize_plank_3_dev()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass
        
        try:
            # plank_4_dev_bucket
            logging.debug(__name__ + ' : ' + ' Start bucketize_plank_4_dev()')
            self.preprocessed_test_data['plank_4_dev_bucket'] = self.preprocessed_test_data['plank_4_dev'].apply(lambda x: self.fe.bucketize_plank_dev(x))
            self.preprocessed_test_data['plank_4_dev_bucket'] = self.preprocessed_test_data['plank_4_dev_bucket'].apply(lambda x: self.dev_map[x])
            logging.debug(__name__ + ' : ' + ' End bucketize_plank_4_dev()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass

        try:
        	# plank_4_deviation from fitted line through COMs of 2nd and 3rd planks
            logging.debug(__name__ + ' : ' + ' Start plank_4_wrt_3_2()')
            self.preprocessed_test_data['plank_4_wrt_3_2'] = self.preprocessed_test_data.apply(lambda x: self.fe.plank_4_wrt_3_2(x), axis = 1)
            logging.debug(__name__ + ' : ' + ' End plank_4_wrt_3_2()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            pass


        logging.debug(__name__ + ' shape : ' + str(self.preprocessed_test_data.shape))
        logging.debug(__name__ + ' : ' + ' End transform_test_data_into_features()')
        
        return
    
    def train_model(self):
        
        '''
        Function to train the model on train data for training pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start train_model()')
        
        try:
            # train the model
            self.learner = Learner(self.n_estimators, self.min_samples_split, \
                               self.min_samples_leaf, self.random_state, \
                               self.model_folder, self.model_name)
            X_train = self.preprocessed_input_data[self.feature_names]
            Y_train = self.preprocessed_input_data[self.target_column]
            predictions, pred_prob  = self.learner.train_model(X_train, Y_train)
            #print (np.round(accuracy_score(Y_train, predictions), 4) * 100)
            #print (np.round(log_loss(Y_train, pred_prob), 2))
            logging.debug(__name__ + ' : ' + ' End train_model()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            return
        
    def classify(self):
        
        '''
        Function to make classifications on test data for the classification pipeline
        '''
        
        logging.debug(__name__ + ' : ' + ' Start classify()')
        
        try:
            
            # classify using the model
            self.classifier = Classifier(self.model_folder)
            X_test = self.preprocessed_test_data[self.feature_names]
            #Y_test = self.preprocessed_test_data[self.target_column]
            #print (X_test.shape, Y_test.shape)
            predictions, pred_prob = self.classifier.classify_model(X_test)
            
            # saving the test dataset with the predicted values
            
            #self.test_data[self.target_column] = predictions
            #self.test_data.to_csv(self.predicted_test_file, index = False)
            self.predicted_test_data = DataFrame({self.target_column:predictions})
            self.predicted_test_data.to_csv(self.predicted_test_file, index = False, header = False)
            
            # printing the accuracy score
            #print (np.round(accuracy_score(Y_test, predictions), 4) * 100)
            #print (np.round(log_loss(Y_test, pred_prob), 2))
            
            logging.debug(__name__ + ' : ' + ' End classify()')
        except Exception as e:
            logging.error(__name__ + ' : ' + ' Error: ' + str(e))
        finally:
            return
Пример #50
0
class Recognizer:
    def __init__(self, agent_id):
        self.clf = Classifier(
            model_path='./models/model.augmented.pkl',
            vectorizer_path='./models/vectorizer.augmented.pkl',
            label_dict_path='./models/label_dict.augmented.pkl')
        self.q_detector = QuestionDetector(agent_id)
        self.tokenizer = MeCab.Tagger()

    def recognize(self, sentence):
        sentences = self.split_sentence(sentence)
        analyzed_sentences = [self.normalize(s) for s in sentences]
        sentences = [''.join([w.word for w in s]) for s in analyzed_sentences]
        intents = self.clf.predict(sentences)
        is_q = any(self.q_detector.detect(s) for s in sentences)
        results = [
            self.get_detail(s, i) for s, i in zip(analyzed_sentences, intents)
        ]
        return results

    def get_detail(self, sentence, intent):
        if intent == 'DIVINE':
            agent_id = self.get_agent_id(''.join([w.word for w in sentence]))
            result = self.get_white_black(sentence)
            result = 'HUMAN' if result is None else result
            return ('DIVINE', agent_id, result)

        elif intent == 'VOTE':
            agent_id = self.get_agent_id(''.join([w.word for w in sentence]))
            return ('VOTE', agent_id)

        elif intent == 'ESTIMATE':
            agent_id = self.get_agent_id(''.join([w.word for w in sentence]))
            role = self.get_white_black(sentence)
            role = self.get_role(sentence) if role is None else role
            return ('ESTIMATE', agent_id, role)

        elif intent == 'CO':
            role = self.get_role(sentence)
            return ('CO', role)

        elif intent == 'REQUEST':
            return ('REQUEST')

        else:
            return ('CHAT')

    def get_agent_id(self, sentence):
        m = re.search(r'Agent\[(\d+)\]', sentence)
        if m:
            return m.group(1)
        else:
            return False

    def get_role(self, sentence):
        if any(w.word == '人狼' for w in sentence):
            return '狼'
        elif any(w.word == '狂人' for w in sentence):
            return '狂'
        elif any(w.word == '占い師' for w in sentence):
            return '占'
        else:
            return '村'

    def get_white_black(self, sentence):
        if any(w.word == '人狼' for w in sentence):
            return 'WEREWOLF'
        elif any(w.word == '村人' for w in sentence):
            return 'HUMAN'
        else:
            return None

    def normalize(self, sentence):
        sentence = self.norm_token(sentence)
        words = self.tokenize(sentence)
        words = [self.norm_role(w) for w in words]
        return words

    def tokenize(self, sentence):
        result = []
        for line in self.tokenizer.parse(sentence).strip().split('\n'):
            if line == 'EOS': break
            word, feature = line.split('\t')
            result.append(Morph(word, feature))
        return result

    def norm_role(self, s):
        if s.pos == '名詞':
            if re.match(r'人狼|狼|黒', s.word):
                s.word = '人狼'
            elif re.match(r'狂人|狂', s.word):
                s.word = '狂人'
            elif re.match(r'占い師|占い|占', s.word):
                s.word = '占い師'
            elif re.match(r'村人|人間|白', s.word):
                s.word = '村人'
        return s

    def norm_token(self, sentence):
        sentence = re.sub(r'[\..。]', '。', sentence)
        sentence = re.sub(r'[\,,、]', '、', sentence)
        sentence = re.sub(r'\?|?', '?', sentence)
        sentence = re.sub(r'\!|!', '!', sentence)
        return sentence

    def split_sentence(self, sentence):
        sentence = self.norm_token(sentence)
        sentence = re.sub(r'?', '?[SEP]', sentence)
        sentence = re.sub(r'!', '![SEP]', sentence)
        sentence = re.sub(r'。', '。[SEP]', sentence)
        sentence = re.sub(r'\[SEP\]$', '', sentence)
        sentences = sentence.split('[SEP]')
        return sentences
Пример #51
0
    def iter_oger_nn_test_data(self):
        """Iter annotations of OGER filtered by the NN on the CRAFT corpus.

        yields:
            tuple: (pmid, sspan, espan, n_gram, label, entity ID)
        """
        # perform concept recognition or not
        cr = int(self.config['other']['concept_recognition'])

        # load the classifier
        c = Classifier(self.config_path)
        c.restore_model()

        # create a feature extractor
        fextr = FeatureExtractor(self.config_path)

        # back mapping from integer to ontology
        mapping = {int(self.config['classes'][o]):
                   o for o in self.config['classes']}

        # initialize feature names with empty arrays
        features = {}
        for name in c.column_names[:-1]:
            features[name] = []

        # lists of term data
        tlists = []

        # go through all OGER test annotations
        for pmid, sspan, espan, n_gram, label, entity_id in \
                self.iter_oger_test_data():
            # get list of feature values
            for i, val in enumerate(fextr.iter_feature_values(n_gram)):
                name = c.column_names[i]
                features[name].append(val)

            tlists.append((pmid, sspan, espan, n_gram, label, entity_id))

        predictions = c.classifier.predict(
                            input_fn=lambda: c.eval_input_fn(features))

        # get predictions and zip them with other annotation data
        for i, pred_dict in enumerate(predictions):
            # list of (probability, entity type label)-tuples
            probs = []
            # go through the probabilities of the entity types
            for index, p in enumerate(pred_dict['probabilities']):
                # append (probability, entity type label)
                probs.append((p, mapping[index]))

            # sort tuples by probability in decreasing order
            probs = sorted(probs, reverse=True)

            # labels to consider, by default only the one with highest prob
            labels = [probs[0][1]]

            # check if difference between the highest and second-highest
            # probability is smaller than 0.3
            threshold = float(self.config['parameters']['threshold'])
            prob_diff = probs[0][0] - probs[1][0]
            if prob_diff < threshold:
                labels.append(probs[1][1])

            # go through entity type labels
            for label in labels:
                # ignore entity types classified as normal nouns
                if label != 'nn':
                    # check if concept recognition should be performed
                    if cr:
                        # ignore entity types where OGER and NN give different
                        # labels
                        if label == tlists[i][4]:
                            yield tlists[i]
                    else:
                        yield tlists[i][:4] + (label,) + (tlists[i][-1],)
class WGAN(tf.keras.Model):
    def __init__(self, batch_size=64):
        super().__init__(name="WGAN")
        self.generator = Generator(100, batch_size)
        self.critic = Critic()
        self.classifier_m = Classifier()

        self.train_dataset = None
        self.test_dataset = None
        self.train_labels = None
        self.test_labels = None
        self.batch_size = batch_size

    def load_dataset(self, dataset, n_classes):
        self.train_dataset, self.train_labels, self.test_dataset, self.test_labels = dataset
        self.num_classes = n_classes

    @tf.function
    def predict_batch(self, images, type_class):
        images_predictions = tf.TensorArray(tf.float32,
                                            size=10,
                                            dynamic_size=True)
        ys = tf.TensorArray(tf.float32, size=10, dynamic_size=True)
        matched_images = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
        index = 0
        for i in tf.range(len(images)):
            gen_image = data_access.normalize(
                data_access.de_standardize(images[i]))
            img = tf.expand_dims(gen_image, axis=0)
            c_type = self.classifier_m.predict_image(img)
            w_list = tf.one_hot(c_type, self.num_classes)
            w_list = tf.reshape(w_list, (w_list.shape[1], ))

            images_predictions = images_predictions.write(i, w_list)
            y_list = tf.one_hot(type_class, self.num_classes)
            ys = ys.write(i, y_list)
            if (tf.reduce_all(tf.equal(w_list, y_list))):
                matched_images = matched_images.write(index, images[i])
                index += 1

        return images_predictions.stack(), ys.stack(), matched_images.stack()

    @tf.function
    def gradient_penalty(self, generated_samples, real_images, half_batch):
        alpha = backend.random_uniform(shape=[half_batch, 1, 1, 1],
                                       minval=0.0,
                                       maxval=1.0)
        differences = generated_samples - real_images
        interpolates = real_images + (alpha * differences)
        gradients = tf.gradients(self.critic(interpolates), [interpolates])[0]
        slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3]))
        gradient_p = tf.reduce_mean((slopes - 1.)**2)
        return gradient_p

    @tf.function
    def training_step_critic(self, real_imgs, gen_imgs, real_labels,
                             gen_labels, half_batch):
        lambda_ = 10.0
        with tf.GradientTape() as tape:
            d_x_real = self.critic(real_imgs, training=True)
            d_x_gen = self.critic(gen_imgs, training=True)
            critic_r_loss = self.critic.compute_loss(real_labels, d_x_real)
            critic_g_loss = self.critic.compute_loss(gen_labels, d_x_gen)
            total_loss = critic_r_loss + critic_g_loss + (
                lambda_ *
                self.gradient_penalty(gen_imgs, real_imgs, half_batch))

        gradients_of_critic = tape.gradient(total_loss,
                                            self.critic.trainable_variables)
        self.critic.backPropagate(gradients_of_critic,
                                  self.critic.trainable_variables)
        return total_loss

    @tf.function
    def training_step_generator(self, noise_size, class_type):
        # prepare points in latent space as input for the generator
        X_g = self.generator.generate_noise(self.batch_size, noise_size)
        # create inverted labels for the fake samples
        y_g = -np.ones((self.batch_size, 1)).astype(np.float32)
        with tf.GradientTape() as tape:
            d_x = self.generator(X_g, training=True)  # Trainable?
            d_z = self.critic(d_x, training=True)
            images_predictions, ys, matched_images = self.predict_batch(
                d_x, class_type)
            generator_loss = self.generator.compute_loss(
                d_z, y_g, ys, images_predictions)

        gradients_of_generator = tape.gradient(
            generator_loss, self.generator.trainable_variables)
        self.generator.backPropagate(gradients_of_generator,
                                     self.generator.trainable_variables)
        return generator_loss, matched_images, self.generator(
            self.generator.seed, training=False)

    def generate_real_samples(self, n_samples):
        # choose random instances
        ix = np.random.randint(0, self.train_dataset.shape[0], n_samples)
        # select images
        X = self.train_dataset[ix]
        # associate with class labels of -1 for 'real'
        y = -np.ones((n_samples, 1)).astype(np.float32)
        return X, y

    @tf.function
    # use the generator to generate n fake examples, with class labels
    def generate_fake_samples(self, noise_size, n_samples):
        # generate points in latent space
        x_input = self.generator.generate_noise(n_samples, noise_size)
        # get images generated
        X = self.generator(x_input, training=True)
        # associate with class labels of 1.0 for 'fake'
        y = np.ones((n_samples, 1)).astype(np.float32)
        return X, y

    def define_loss_tensorboard(self):
        logdir = "logs/train/" + datetime.now().strftime("%Y%m%d-%H%M%S")
        return tf.summary.create_file_writer(logdir=logdir)

    def define_graph_tensorboard(self):
        logdir = "logs/graph/" + datetime.now().strftime("%Y%m%d-%H%M%S")
        return tf.summary.create_file_writer(logdir=logdir)

    def train_model(self, epoches, n_critic=5, noise_size=100, class_type=5):

        batch_per_epoch = int(self.train_dataset.shape[0] / self.batch_size)

        # calculate the number of training iterations
        n_steps = batch_per_epoch * epoches
        # calculate the size of half a batch of samples
        half_batch = int(self.batch_size / 2)

        sum_writer_loss = self.define_loss_tensorboard()
        self.classifier_m.load_local_model()
        avg_loss_critic = tf.keras.metrics.Mean()
        avg_loss_gen = tf.keras.metrics.Mean()
        epoch = 0
        n_dif_images = 4
        directory = 'imgs'
        start_time = time.time()
        for i in range(n_steps):
            for _ in range(n_critic):
                # get randomly selected 'real' samples
                X_real, y_real = self.generate_real_samples(half_batch)
                # generate 'fake' examples
                X_fake, y_fake = self.generate_fake_samples(
                    noise_size, half_batch)

                # update critic model weights
                c_loss = self.training_step_critic(X_real, X_fake, y_real,
                                                   y_fake, half_batch)
                avg_loss_critic(c_loss)

            gen_loss, matched_images, gen_images = self.training_step_generator(
                noise_size, class_type)
            avg_loss_gen(gen_loss)
            data_access.print_training_output(i, n_steps,
                                              avg_loss_critic.result(),
                                              avg_loss_gen.result())

            if ((i % (n_steps / epoches)) == 0):
                data_access.store_images_seed(directory,
                                              gen_images[:n_dif_images], epoch)
                with sum_writer_loss.as_default():
                    tf.summary.scalar('loss_gen',
                                      avg_loss_gen.result(),
                                      step=self.generator.optimizer.iterations)
                    tf.summary.scalar('avg_loss_critic',
                                      avg_loss_critic.result(),
                                      step=self.critic.optimizer.iterations)
                epoch += 1
        data_access.create_collection(epoches, n_dif_images, directory)
        print('Time elapse {}'.format(time.time() - start_time))

    def generate_images(self, number_of_samples, directory):
        seed = tf.random.normal([number_of_samples, 100])
        images = self.generator(seed)
        predictions = self.classifier_m.predict_image_vector(
            data_access.normalize(data_access.de_standardize(images)))
        data_access.produce_generate_figure('imgs', images, predictions)
 def __init__(self,
              microDataLoc,
              clusterNum=1,
              macroDataLoc="data/clusterData.txt"):
     Classifier.__init__(self, microDataLoc, clusterNum, macroDataLoc)
Пример #54
0
    def start_rules_training(self):

        # Generate the tags for the values range
        gen_tags = gt(self.df)
        tags_ranges = gen_tags.set_tags()

        # Split the dataset in 5 random partitions
        parts_gen = Part(self.df)
        partition_set = parts_gen.gen_partition_set()

        # Initialize the best rulesset using full dataset
        best_rulesset = pd.DataFrame()
        best_rulesset = self.get_initial_rules(self.df, tags_ranges)

        # Train the rules with all posible combinations of training and test partitions
        for i in range(0, len(partition_set) - 1):

            # Select the partition for the test set, using the index of the loop
            test_set = partition_set[i]

            # Select the partitions for training set, removing test partition from a copy of the partitions list
            training_set = partition_set.copy()
            training_set.pop(i)  # Remove test partition from training_set

            # Fuzzify the data from the test set
            fuzzifier = FuzGen(test_set)
            test_df = fuzzifier.fuzzify_data(tags_ranges)
            '''
            Deal each training set with the rules set, to get the best rules set.
            In each iteration, accumulate the matched rules to the previous rules set.

            This will allows to distinct the best rules, which have been matched more times
            '''
            for training_df in training_set:
                # Fuzzify training partition
                fuzzifier = FuzGen(training_df)
                fuzzy_df = fuzzifier.fuzzify_data(tags_ranges)

                # Deal the new rules set to the training partition
                classifier = Classifier(fuzzy_df, best_rulesset)
                classifier.classify_dataset()

                # Check results of classification: matched rules and positives rate
                TP_value, matched_rules = classifier.verify_classification()

                # Concatenate the matched rules to the current best rules set
                best_rulesset = pd.concat([best_rulesset, matched_rules])
            '''
            Once get the matched rules over the initial set, test the rules set over the test partition
            Before this, apply a filter to select only a rule for each antecesors set, based in the matches
            got from the training
            '''

            # Filter the best rules, removing repeated antecesors
            best_rulesset = self.reduce_rules(best_rulesset, tags_ranges)

            # Try to classify the test set with the rules set get from training
            classifier = Classifier(test_df, best_rulesset)
            classifier.classify_dataset()

            # Check classification results
            TP_value, matched_rules = classifier.verify_classification()

            # Calculate accuraccy, as the division between the positives rate (matches) and the length of test set
            accuraccy = (TP_value / len(test_df))

            print(f"Test {i} accuraccy: {accuraccy}")

        print(f"Lenght of minimal rules set: {len(best_rulesset)}")

        return best_rulesset
Пример #55
0
     time_budget = max_time
 overall_time_budget = overall_time_budget + time_budget
 vprint( verbose,  "[+] Cumulated time budget (all tasks so far)  %5.2f sec" % (overall_time_budget))
 # We do not add the time left over form previous dataset: time_budget += time_left_over
 vprint( verbose,  "[+] Time budget for this task %5.2f sec" % time_budget)
 time_spent = time.time() - start
 vprint( verbose,  "[+] Remaining time after reading data %5.2f sec" % (time_budget-time_spent))
 if time_spent >= time_budget:
     vprint( verbose,  "[-] Sorry, time budget exceeded, skipping this task")
     execution_success = False
     continue
 
 # ========= Creating a model, knowing its assigned task from D.info['task'].
 # The model can also select its hyper-parameters based on other elements of info.  
 vprint( verbose,  "======== Creating model ==========")
 M = Classifier()
 
 # ========= Reload trained model if it exists.
 vprint( verbose,  "**********************************************************************")
 vprint( verbose,  "****** Attempting to reload model (from res/) to avoid training ******")
 vprint( verbose,  "**********************************************************************")
 you_must_train=1
 modelname = os.path.join(res_dir,basename)
 if os.path.isfile(modelname + '_model.pickle'):
     M = M.load(modelname)
     you_must_train=0
     vprint( verbose,  "[+] Model reloaded, no need to train!")
     
 # ========= Train if needed only.
 if you_must_train:
     vprint( verbose, "======== Trained model not found, proceeding to train!")
Пример #56
0
                     ha='center',
                     color=color)
    plt.xlabel('Predicted Value')
    plt.xticks(range(CLASSES))
    plt.ylabel('Actual Value')
    plt.yticks(range(CLASSES))
    plt.colorbar()
    plt.tight_layout()
    plt.savefig(str(plot_name), bbox_inches='tight', pad_inches=0)
    plt.close()


# Seed for consistency
np.random.seed(SEED)
# Load best weights back up and make confusion matrices
classifier = Classifier(input_size=INPUTS)
weight_files = sorted(CLASS_MODEL_DIR.iterdir())
for weight_file in weight_files[:-1]:
    classifier.addLayer(file_name=weight_file, output=False)
classifier.addLayer(file_name=weight_files[-1], output=True)
train_conf_title = 'Train Confusion Matrix'
makeConfMat(classifier,
            train_data,
            train_labels,
            CLASS_TRAIN_CONF,
            title=train_conf_title)
test_conf_title = 'Test Confusion Matrix'
makeConfMat(classifier,
            test_data,
            test_labels,
            CLASS_TEST_CONF,
def run_iteration(iteration, hash_map):
    lbp = LocalBinaryPatterns(24, 8)
    data = []
    labels = []

    #Finding all images
    images = [os.path.join(root, name) for root, dirs, files in os.walk("../training_images")
            for name in files if name.endswith((".jpeg", ".jpg"))]

    #Spliting it into training and testing groups
    training, testing = train_test_split(images, test_size = 0.25)

    #Training Phase
    for imagePath in training:
      #Load the image, convert it to grayscale, and compute LBP
      image = cv2.imread(imagePath)
      gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
      if imagePath in hash_map:
        hist = hash_map[imagePath]
      else:
        hist = lbp.compute(gray)
        hash_map[imagePath] = hist

      print str(iteration) + " DEBUG(Training): Computed LBP Histogram for " + imagePath

      #Plotting histogram if needed
      #plt.bar(bin_edges[:-1], hist, width = 1)
      #plt.xlim(min(bin_edges), max(bin_edges))
      #plt.show()

      #Extract the label from the image path, then update the label and data lists
      labels.append(imagePath.split("/")[-2])
      data.append(hist)

    #Train classifier
    classifier = Classifier("SVM")
    print "\n\n" + str(iteration) + " DEBUG: Training Classifier"
    classifier.train(data, labels)
    print "\n\n" + str(iteration) + " DEBUG: Trained Classifier\n\n"

    #Testing Phase
    data = []
    labels = []
    for imagePath in testing:
      #Load the image, convert to grayscale, describe it and classify it
      image = cv2.imread(imagePath)
      gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
      if imagePath in hash_map:
        hist = hash_map[imagePath]
      else:
        hist = lbp.compute(gray)
        hash_map[imagePath] = hist

      print str(iteration) + " DEBUG(Testing): Computed LBP Histogram for " + imagePath

      data.append(hist)
      labels.append(imagePath.split("/")[-2])

    print "\n\n" + str(iteration) + " DEBUG: Forming predictions"
    predictions = classifier.predict(data)
    counter = 0
    print "\n\n" + str(iteration) + " DEBUG: Printing predictions\n\n"
    for index, prediction in enumerate(predictions):
        print "Name -> " + testing[index] + " Actual -> " + labels[index] + " Prediction -> " + prediction
        if labels[index] == prediction:
            counter = counter + 1

    accuracy = (float(counter)/float(len(predictions))) * 100.0
    print "\n\n" + str(iteration) + " The Classifier Accuracy was " + str(accuracy) + "%"

    return accuracy
Пример #58
0
    #Load the image, convert it to grayscale, and compute LBP
    image = cv2.imread(imagePath)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hist = lbp.compute(gray)

    #Plotting histogram if needed
    #plt.bar(bin_edges[:-1], hist, width = 1)
    #plt.xlim(min(bin_edges), max(bin_edges))
    #plt.show()

    #Extract the label from the image path, then update the label and data lists
    labels.append(imagePath.split("/")[-2])
    data.append(hist)

#Train classifier
classifier = Classifier("SVM")
classifier.train(data, labels)

#Testing Phase
data = []
testing = [
    os.path.join(root, name)
    for root, dirs, files in os.walk("../testing_images") for name in files
    if name.endswith((".jpeg", ".jpg"))
]

for imagePath in testing:
    #Load the image, convert to grayscale, describe it and classify it
    image = cv2.imread(imagePath)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hist = lbp.compute(gray)
Пример #59
0
                            "nubank", "ciw", "cef-savings", "itau-cc",
                            "itau-savings", "bradesco-savings", "generic"
                        ],
                        required=True,
                        help="Set account that will be used.")
    parser.add_argument("-af",
                        "--account_src_file",
                        required=True,
                        help="Set account source to integrate")
    parser.add_argument("-acf", "--account_from", help="Define from import")
    parser.add_argument("-act", "--account_to", help="Define to import")
    parser.add_argument(
        "-cl",
        "--classifier",
        default=None,
        choices=Classifier.AVAILABLE_STRATEGIES().keys(),
        help="Define classifier that must be used to import data")

    args = parser.parse_args()
    if args.verbose:
        loglevel = logging.DEBUG
        logformat = Util.LOG_FORMAT_DEBUG
    elif args.quiet:
        loglevel = logging.WARN
        # TODO log to file in this case
        logformat = Util.LOG_FORMAT_FULL
    else:
        loglevel = logging.INFO
        logformat = Util.LOG_FORMAT_SIMPLE

    # TODO config logger by dictnoray - https://realpython.com/python-logging/
from classifier import Classifier
from detector import Detector
from pipeline import *
from steps import *

with open(common.CONFIG_PATH) as f:
    config = yaml.load(f)

main_pipeline = Pipeline([
    Input("input"),
    DetectingSingleFrameStep(
        "detector", Detector(snapshot_path=config['paths']['detector']),
        EXTRACTORS[config["extractor"]]()),
    ClassifyingBoxesStep(
        "classifier",
        model=Classifier(snapshot_path=config['paths']['classifier']),
        input_width=common.CLASSIFIER_INPUT_WIDTH,
        input_height=common.CLASSIFIER_INPUT_HEIGHT),
    DecodeClassesStep("decoder",
                      label_encoder=common.unpickle_data(
                          config['paths']['label_encoder'])),
])

visualisation_pipeline = main_pipeline + [
    VisualiseStep("visualise"),
    ShowVisualisation("showtime")
]

without_showing = main_pipeline + [VisualiseStep("visualise")]