def __init__(self, bzrc): self.bzrc = bzrc self.constants = self.bzrc.get_constants() self.commands = [] self.ALPHA = 0.01 self.BETA = 0.3 self.OBS_TOLERANCE = 35.0 self.S = 50 self.wroteonce = False self.goalradius = 30 self.tankradius = 5 self.avoidradius = 50 self.avoidBETA = 0.05 self.aimtolerance = math.pi / 20 self.world_grid = WorldGrid() self.bayes = Bayes() self.turnprob = 0.05 self.turndecisionprob = 0.5 self.turned = False self.turniter = 0 self.TURN_MAX = 50 self.OCCUPIED = 1 self.UNOCCUPIED = 0
def __init__(self): self.lh = Lighthouse(.25, .75) x = np.linspace(0, 1, 99) y = np.linspace(0, 1, 99) pdf = {(xi, yi): 1 for xi in x for yi in y} likelihood = lambda x, (d, m): d / (d**2 + (x - m)**2) self.bayes = Bayes(pdf, likelihood)
def main(): # Process CLI arguments. try: execname, host, port = sys.argv except ValueError: execname = sys.argv[0] print >>sys.stderr, '%s: incorrect number of arguments' % execname print >>sys.stderr, 'usage: %s hostname port' % sys.argv[0] sys.exit(-1) # Connect. #bzrc = BZRC(host, int(port), debug=True) bzrc = BZRC(host, int(port)) bayes = Bayes() constants = bzrc.get_constants() bayes.self_not_obs_given_not_occ(float(constants['truenegative'])) bayes.set_obs_given_occ(float(constants['truepositive'])) agent = Agent(bzrc, bayes) prev_time = time.time() # Run the agent try: while True: time_diff = time.time() - prev_time agent.tick(time_diff) except KeyboardInterrupt: print "Exiting due to keyboard interrupt." bzrc.close()
def prep_model(can_load_model, load_path): bayes = None if can_load_model: with open(load_path, "rb") as f: bayes = pickle.load(f) else: bayes = Bayes() return bayes
def __init__(self): if os.path.getsize("bd_path.txt") > 0: with open("bd_path.txt", 'rb') as f: self._bd_path = f.readline() f.close() self._genre_features = np.load('genres_features.npy') self._knn = KNN(self._bd_path, self._genre_features) self._bayes = Bayes(self._bd_path, self._genre_features) self._genres = np.load('genres.npy')
def change_db(self, path): self._bd_path = path with open("bd_path.txt", 'w') as f: f.write(path) f.close() # self._genres, self._genre_features = extract_funtions.extrakt_features_for_genres(self._bd_path) # np.save('genres.npy', self._genres) # np.save("genres_features.npy", self._genre_features) self._knn = KNN(self._bd_path, self._genre_features) self._bayes = Bayes(self._bd_path, self._genre_features) #self._knn.train_model() self._bayes.train_model()
def modelOutput(trainFile, testFile, modelType): """ output is: (naive bayes) variable name | 'class' (tan) variable name | name of its parents # empty followed by: predict class | actual class | posterior probability (12 digits after decimal point) # empty followed by: The number of the test-set examples that were correctly classified. """ attributes, labels, instances = data_provider(trainFile) if modelType == 'n': model = Bayes(attributes, labels, instances) elif modelType == 't': model = TAN(attributes, labels, instances) else: import sys print >> sys.stderr, 'model type should be [n] or [t] !!!' sys.exit() attributes, labels, instances = data_provider(testFile) # format output part1: attribute name | 'class' model.printTree() print correctClassCnt = 0 for test in instances: result = model.classify(test) if result[0] == result[1]: correctClassCnt += 1 # format output part2: predict class | actual class | posterior probability print formatOutput(result) print # format output part3: correctly classified number of test instances print correctClassCnt
'''数据加载''' data = pda.read_csv("./iris.csv") '''标准化''' data_standard = preprocessing.scale(data.iloc[:, :-1]) '''切分数据集,处理过拟合''' train_data, test_data, train_labels, test_labels = train_test_split( data_standard, data.as_matrix()[:, -1], test_size=0.2, random_state=int(time.time())) ''' 贝叶斯算法识别 ''' print("---------------------------贝叶斯----------------------------------") start = time.clock() by = Bayes() by.train(list(train_data), list(train_labels)) test_data_size = test_data.shape[0] error_count = 0 for index, td in enumerate(list(test_data)): this_label = by.test(td) print("预测类别:{0},真实类别:{1}".format(this_label, test_labels[index])) if this_label != test_labels[index]: error_count += 1 end = time.clock() error_rate = (error_count / test_data_size) * 100 time_consuming = end - start print("错误率为:{0:.2f}%".format(error_rate)) print("耗时:{0:.4f}s".format(time_consuming)) ''' k-近邻算法识别
def __init__(self): self.classifier = Bayes() self.seg = Seg() self.seg.load('seg.pickle')
model_now += 1 print("SVM 1训练中...") svm = Svm(svm_label, svm_images, svm_test_label, svm_test_images) results[model_now] = svm.train(numToClassfy, numToTrain, "-q -m 1000") model_now += 1 print("SVM 2训练中...") svm = Svm(svm_label, svm_images, svm_test_label, svm_test_images) results[model_now] = svm.train(numToClassfy, numToTrain, "-q -m 1000 -t 3") model_now += 1 knn = knn(10, images, label) results[model_now] = knn.start(test_images, test_label, numToClassfy) model_now += 1 bayes = Bayes(10, 784, images, label) results[model_now] = bayes.start(test_images, test_label, numToClassfy) model_now += 1 results = results.T count = 0 for i in range(numToClassfy): progress = Progress(numToClassfy, "正在投票") k = np.argmax(np.bincount(results[i])) if (k == test_label[i]): count += 1 progress.updata(i + 1) print("集成识别完成,样本个数: " + str(numToClassfy) + " 识别数: " + str(count) + " 正确率: %.2f %%" % ((count / numToClassfy) * 100))
config = json.load(f) # update the configuration for the new version of the data config["csv"] = "test/weather_v2.csv" config["inputs"] = data.drop(columns=["city", "date", "avg_temp"]).columns.tolist() # test features # config["inputs"] = None # config["resolution"] = None config["input_history"] = False # In[2]: Model the data # produce a bayesian ridge regression rolling forecast print("---- Bayesian Ridge Regression ----") model5 = Bayes(**config) model5.roll(verbose=True) print(f"Bayesian Average Error: {np.round(model5._error.mean()[0] * 100, 2)}%") print("---- PLS ----") model4 = PLS(**config) model4.roll(verbose=True) print(f"PLS Average Error: {np.round(model4._error.mean()[0] * 100, 2)}%") # produce a neural network rolling forecast print("---- Neural Network ----") model3 = MLP(**config) model3.roll(verbose=True) print(f"NNet Average Error: {np.round(model3._error.mean()[0] * 100, 2)}%") # produce a random forest rolling forecast
def main(): get_data('data/boy82.txt', 'boy') get_data('data/boy83.txt', 'boy') get_data('data/boynew.txt', 'boy') get_data('data/girl35.txt', 'girl') get_data('data/girl42.txt', 'girl') get_data('data/girlnew.txt', 'girl') test3 = [] testhw = [] tesths = [] testws = [] boys = open('data/boy.txt') girls = open('data/girl.txt') print("1) Bayes") print("2) Fisher") print("3) kNN") choice = input("Input the algorithm: ") if choice == '1': for line in boys.readlines(): height, weight, shoe_size = line.split() test3.append([[float(height)], [float(weight)], [float(shoe_size)], 1]) testhw.append([[float(height)], [float(weight)], 1]) tesths.append([[float(height)], [float(shoe_size)], 1]) testws.append([[float(weight)], [float(shoe_size)], 1]) for line in girls.readlines(): height, weight, shoe_size = line.split() test3.append([[float(height)], [float(weight)], [float(shoe_size)], 0]) testhw.append([[float(height)], [float(weight)], 0]) tesths.append([[float(height)], [float(shoe_size)], 0]) testws.append([[float(weight)], [float(shoe_size)], 0]) plt.xlim(0, 1.0) plt.ylim(0, 1.0) plt.plot([0, 1.0], [0, 1.0], color='red') b3 = Bayes(trains3) b3.paint(test3, 'b') bhw = Bayes(trainshw) bhw.paint(testhw, 'g') bhs = Bayes(trainshs) bhs.paint(tesths, 'r') bws = Bayes(trainsws) bws.paint(testws, 'y') elif choice == '2': for line in boys.readlines(): height, weight, shoe_size = line.split() test3.append([[float(height)], [float(weight)], [float(shoe_size)], 1]) testhw.append([[float(height)], [float(weight)], 1]) tesths.append([[float(height)], [float(shoe_size)], 1]) testws.append([[float(weight)], [float(shoe_size)], 1]) for line in girls.readlines(): height, weight, shoe_size = line.split() test3.append([[float(height)], [float(weight)], [float(shoe_size)], 0]) testhw.append([[float(height)], [float(weight)], 0]) tesths.append([[float(height)], [float(shoe_size)], 0]) testws.append([[float(weight)], [float(shoe_size)], 0]) f3 = Fisher(trains3) fhw = Fisher(trainshw) fhs = Fisher(trainshs) fws = Fisher(trainsws) print("1) ROC") print("2) Line") choice = input("Choose: ") if choice == '1': f3.paint(test3, 'b') fhw.paint(testhw, 'g') fhs.paint(tesths, 'r') fws.paint(testws, 'y') elif choice == '2': print("1) Height And Weight") print("2) Height And Shoe Size") print("3) Weight And Shoe Size") choice = input("Choose: ") if choice == '1': fhw.paint_line(testhw, 'c') elif choice == '2': fhs.paint_line(tesths, 'r') elif choice == '3': fws.paint_line(testws, 'y') elif choice == '3': for line in boys.readlines(): height, weight, shoe_size = line.split() test3.append([float(height), float(weight), float(shoe_size), 1]) testhw.append([float(height), float(weight), 1]) tesths.append([float(height), float(shoe_size), 1]) testws.append([float(weight), float(shoe_size), 1]) for line in girls.readlines(): height, weight, shoe_size = line.split() test3.append([float(height), float(weight), float(shoe_size), 0]) testhw.append([float(height), float(weight), 0]) tesths.append([float(height), float(shoe_size), 0]) testws.append([float(weight), float(shoe_size), 0]) choice = input("Input K:") if choice == '1': k1 = kNN(trainshs, 1) print(k1.test(tesths)) k1.paint() elif choice == '3': k3 = kNN(trainshs, 3) print(k3.test(tesths)) k3.paint() elif choice == '5': k5 = kNN(trainshs, 5) print(k5.test(tesths)) k5.paint() plt.show()
def train_model(ngrams_file, output_file): model = Bayes() model.train(Ngrams._load_data(ngrams_file)) model.serialize(output_file)
return 0 uniform_prior = {4: 0.2, 6: 0.2, 8: 0.2, 12: 0.2, 20: 0.2} unbalanced_prior = {4: 0.08, 6: 0.12, 8: 0.16, 12: 0.24, 20: 0.4} d = [ 8, 2, 1, 2, 5, 8, 2, 4, 3, 7, 6, 5, 1, 6, 2, 5, 8, 8, 5, 3, 4, 2, 4, 3, 8, 8, 7, 8, 8, 8, 5, 5, 1, 3, 8, 7, 8, 5, 2, 5, 1, 4, 1, 2, 1, 3, 1, 3, 1, 5 ] set1 = [1, 1, 1, 3, 1, 2] set2 = [10, 10, 10, 10, 8, 8] print('What are the posteriors if we started with the uniform prior?') bayes_uniform = Bayes(uniform_prior.copy(), likelihood_func=likelihood_func) bayes_uniform.update(8) bayes_uniform.print_distribution() print('What are the posteriors if we started with the unbalanced prior?') bayes_unbalanced = Bayes(unbalanced_prior.copy(), likelihood_func=likelihood_func) bayes_unbalanced.update(8) bayes_unbalanced.print_distribution() print( 'How different were these two posteriors (the uniform from the unbalanced)?' ) for k, v in bayes_unbalanced.posterior.items(): print("{} : {}".format( k, bayes_uniform.posterior[k] - bayes_unbalanced.posterior[k]))
print("_" * 15 + "BAYES CLASSIFIER" + "_" * 15) print_menu() classifier = None while (True): command = input("Enter command:") command = command.lower() #Train clause if command.startswith('t'): classifier = pp.main() #Load training clause elif command.startswith('l'): print("Loading: ", end='') if classifier is None: classifier = Bayes(trained=True) else: classifier.load() #Save training clause elif command.startswith('s'): print("Saving: ", end='') if classifier is not None: classifier.save() else: print("Nothing to save") #Classify clause elif command.startswith('c'): if classifier is None: print("Load training first") else: path = command.split(" ")
return p elif val == 'T': return 1-p '''Make a graph with 8 subplots that has the posterior for each of the following scenarios. Make sure to give each graph a title! * You get the data: H * You get the data: T * You get the data: H, H * You get the data: T, H * You get the data: H, H, H * You get the data: T, H, T * You get the data: H, H, H, H * You get the data: T, H, T, H''' bayes_uniform = Bayes(prior_dict.copy(),likelihood_func=likelihood) bayes_uniform.update('H') fig, axs = plt.subplots(4,2, figsize=(14,8)) scenarios = ['H','T',['H','H'],['T','H'],['H','H','H'],['T','H','T'],['H','H','H','H'],['T','H','T','H']] i = 1 for scenario, ax in zip(scenarios, axs.flatten()): bayes = Bayes(prior_dict.copy(),likelihood_func=likelihood) for flip in scenario: bayes.update(flip) bayes.plot(ax,title='Scenario #'+str(i)+': '+ ', '.join(scenario)) i+=1 plt.tight_layout() plt.show() '''On a single graph, Use the coin.py random coin generator and overlay the initial uniform prior with the prior after 1, 2, 10, 50 and 250 flips..
import pandas from matplotlib import pyplot from bayes import Bayes from utils import generate_datasets data = pandas.read_csv('./datasets/leaf.csv') labels = data["species"] data.drop(data.columns[-1], axis=1, inplace=True) print(data.index) for dataset in generate_datasets(data, labels): print('\n' + dataset.name) for training_percent in range(60, 91, 5): classifier = Bayes(dataset.data, labels, training_percent) classifier.train() classifier.test() dataset.result.append(classifier.get_accuracy()) print('Training percent: ' + str(training_percent) + '%, accuracy: ' + str(classifier.get_accuracy())) pyplot.plot(range(60, 91, 5), dataset.result, label=dataset.name) pyplot.xlabel('Training percent') pyplot.ylabel('Accuracy') pyplot.legend() pyplot.savefig('plot', dpi=200, bbox_inches='tight')
from bayes import Bayes # First you need to create an instance of this # algorithm and defined what field/column # you want to classify instance = Bayes("Sex") # Secondly, you will need to learn about a set # of data to train the algorithm instance.learn("static/data_test.csv") # Finally you can use your trained instance to # classify a set of data (In this example we # will find the most probable sex) print(instance.classify([6, 130, 8]))
die (int): number of sides of the die that produced the roll Returns: likelihood (float): the probability of the roll given the die. """ if roll in range(1, die + 1): return 1 / die else: return 0 if __name__ == '__main__': uniform_prior = {4: .08, 6: .12, 8: .16, 12: .24, 20: .40} unbalanced_prior = {} die_bayes_1 = Bayes(uniform_prior.copy(), die_likelihood) experiment = [ 8, 2, 1, 2, 5, 8, 2, 4, 3, 7, 6, 5, 1, 6, 2, 5, 8, 8, 5, 3, 4, 2, 4, 3, 8, 8, 7, 8, 8, 8, 5, 5, 1, 3, 8, 7, 8, 5, 2, 5, 1, 4, 1, 2, 1, 3, 1, 3, 1, 5 ] experiment2 = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] experiment3 = [20, 20, 20, 20, 20, 20, 20, 20, 20, 20] for idx, roll in enumerate(experiment3): print('roll#={} dic rolled={}'.format(idx + 1, roll)) die_bayes_1.update(roll) die_bayes_1.print_distribution() #unbalanced_prior = die_bayes_1.prior print('*' * 32)
from bayes import Bayes, URGENCIES from sys import argv import json from signal import signal, SIGPIPE, SIG_DFL signal(SIGPIPE, SIG_DFL) f = open(argv[1], 'r') json_str = f.read() message_list = json.loads(json_str) bae = Bayes(message_list) bae.train() test_data = open(argv[2], 'r') test_data = test_data.read() test_data = json.loads(test_data) def prob_class(string, clazz): S = set(string.split()) fv = bae.gen_feature_vector(S) return bae.prob_class(fv, clazz) def main(): tests = test_data out = {} for test in tests: o = {} for u in URGENCIES.keys():
def main(): #Making list of .txt-files (per sentiment) print("\tLOADING FILES") path = Path('..').joinpath('Data') test_ = path.joinpath('test') train = path.joinpath('train') tp_reviews = txtToList(test_.joinpath('pos')) tn_reviews = txtToList(test_.joinpath("neg")) pos_reviews = txtToList(train.joinpath("pos")) neg_reviews = txtToList(train.joinpath("neg")) print("\tFILES LOADED") #Cleaning reviews reviews = [pos_reviews, neg_reviews, tp_reviews, tn_reviews] print("\tCLEANING REVIEWS") for list_ in reviews: for i, review in enumerate(list_): list_[i] = clean_text(review) #Joining the reviews into one string (per sentiment) pos_string = "".join([string for string in pos_reviews]) neg_string = "".join([string for string in neg_reviews]) #Counting the frequency of words (per sentiment and total) posCounter = Counter(pos_string.split()) negCounter = Counter(neg_string.split()) vocabCounter = Counter(pos_string.split() + neg_string.split()) for term in list(posCounter): if (posCounter[term] == 1): del posCounter[term] for term in list(negCounter): if (negCounter[term] == 1): del negCounter[term] classifier = Bayes(vocab_counts=vocabCounter) classifier.train(posCounter, negCounter) testSets = [tp_reviews, tn_reviews] n_pos_tp, n_neg_tp = 0, 0 n_pos_tn, n_neg_tn = 0, 0 for i, testSet in enumerate(testSets): print("_" * 15 + "RESULTS" + "_" * 15) n_pos, n_neg = 0, 0 for review in testSet: pos, neg = classifier.test(review) if (pos >= neg): n_pos += 1 else: n_neg += 1 if (i == 0): print("Positive Testset: ") n_pos_tp, n_neg_tp = n_pos, n_neg else: print("Negative Testset: ") n_pos_tn, n_neg_tn = n_pos, n_neg print("Positive reviews: {}".format(n_pos)) print("Negative reviews: {}".format(n_neg)) pos_prec = n_pos_tp / (n_pos_tp + len(tn_reviews) - n_neg_tn) pos_rec = n_pos_tp / len(tp_reviews) pos_f1 = 2 * ((pos_prec * pos_rec) / (pos_prec + pos_rec)) neg_prec = n_neg_tn / (n_neg_tn + len(tp_reviews) - n_pos_tp) neg_rec = n_neg_tn / len(tn_reviews) neg_f1 = 2 * ((neg_prec * neg_rec) / (neg_prec + neg_rec)) scores = [pos_prec, pos_rec, pos_f1, neg_prec, neg_rec, neg_f1] save_stats(scores) print_stats(scores) return classifier