def main(): with open('../result/hudong/dataTables.data', 'r') as f: dataTables = pickle.load(f) auto_marks = [] for nn, dataTable in enumerate(dataTables): mentions = [] for i in xrange(dataTable.row): for j in xrange(dataTable.col): mentions.append(dataTable[i][j]) auto_mark = MachineLearning.main(mentions, dataTable.row, dataTable.col) auto_marks.append(auto_mark) #print auto_marks print u'第%d个表格标注完成' % nn with open('../result/hudong/taiyun/auto_mark.data', 'w') as f: json.dump(auto_marks, f) #------------------------------------------------------------------------------- with open('../result/baidu/dataTables.data', 'r') as f: dataTables = pickle.load(f) auto_marks = [] for nn, dataTable in enumerate(dataTables): mentions = [] for i in xrange(dataTable.row): for j in xrange(dataTable.col): mentions.append(dataTable[i][j]) auto_mark = MachineLearning.main(mentions, dataTable.row, dataTable.col) auto_marks.append(auto_mark) print u'第%d个表格标注完成' % nn with open('../result/baidu/taiyun/auto_mark.data', 'w') as f: json.dump(auto_marks, f) #------------------------------------------------------------------------------- with open('../result/wiki/dataTables.data', 'r') as f: dataTables = pickle.load(f) auto_marks = [] for nn, dataTable in enumerate(dataTables): mentions = [] for i in xrange(dataTable.row): for j in xrange(dataTable.col): mentions.append(dataTable[i][j]) auto_mark = MachineLearning.main(mentions, dataTable.row, dataTable.col) auto_marks.append(auto_mark) print u'第%d个表格标注完成' % nn with open('../result/wiki/taiyun/auto_mark.data', 'w') as f: json.dump(auto_marks, f)
def svm_machine_learn_no_model(file_twin, file_kink, file_cluster, filetrace_twin, filetrace_kink, filetrace_cluster, filetrace, filename_mark, smooth=0): # 读twin的数据文件 # 没有模型可以导入的话 print('svm_machine_learn with no model is running') f_twin = Screening.read_file(file_twin) # 提取出所有有用信号的频域向量 result1 = Screening.data_fre(f_twin, filetrace_twin, smooth) fre_range = result1[0] fre_twin = result1[1] # 读kink的数据文件 f_kink = Screening.read_file(file_kink) result2 = Screening.data_fre( f_kink, filetrace_kink, smooth) # [[frequency range], [[fre1],[fre2],[fre3]...[fre-n]]] fre_kink = result2[1] # 读需要分类的数据文件 f = Screening.read_file(file_cluster) result3 = Screening.data_fre(f, filetrace_cluster, smooth) fre = result3[1] # svm学习 label_fre = MachineLearning.skl_svm( fre_twin, fre_kink, fre, filetrace, filename_mark) # [cluster_label, data_fre] result = MachineLearning.ave_fre( label_fre, fre_range) # [[frequency range], [[fre1_twin],[fre2_kink]]] temp = [['fre-range', 'twin', 'kink']] array = np.array([result[0], result[1][0], result[1][1]]) array = array.T # 第一列fre-range,第二列twin,第三列kink arr = array.tolist() for i in arr: temp.append(i) # 保存文件 filename = 'SVM_averange_frequency.csv' f = filetrace + '\\' + 'File after Processing' + '\\' + filename np.savetxt(f, temp, fmt='%s', delimiter=',') print('SVM_averange_frequency File made') # 保存文件, 每个cluster的平均频谱 filename = 'SVM_label.csv' f = filetrace + '\\' + 'File after Processing' + '\\' + filename np.savetxt(f, label_fre[0], fmt='%s', delimiter=',') print('SVM_label File made') return label_fre[0]
def main(): with open('../result/hudong/dataTables.data','r') as f: dataTables = pickle.load(f) auto_marks = [] for nn, dataTable in enumerate(dataTables): mentions = [] for i in xrange(dataTable.row): for j in xrange(dataTable.col): mentions.append(dataTable[i][j]) auto_mark = MachineLearning.main(mentions, dataTable.row, dataTable.col) auto_marks.append(auto_mark) #print auto_marks print u'第%d个表格标注完成'%nn with open('../result/hudong/taiyun/auto_mark.data','w') as f: json.dump(auto_marks, f) #------------------------------------------------------------------------------- with open('../result/baidu/dataTables.data','r') as f: dataTables = pickle.load(f) auto_marks = [] for nn, dataTable in enumerate(dataTables): mentions = [] for i in xrange(dataTable.row): for j in xrange(dataTable.col): mentions.append(dataTable[i][j]) auto_mark = MachineLearning.main(mentions, dataTable.row, dataTable.col) auto_marks.append(auto_mark) print u'第%d个表格标注完成'%nn with open('../result/baidu/taiyun/auto_mark.data','w') as f: json.dump(auto_marks, f) #------------------------------------------------------------------------------- with open('../result/wiki/dataTables.data','r') as f: dataTables = pickle.load(f) auto_marks = [] for nn, dataTable in enumerate(dataTables): mentions = [] for i in xrange(dataTable.row): for j in xrange(dataTable.col): mentions.append(dataTable[i][j]) auto_mark = MachineLearning.main(mentions, dataTable.row, dataTable.col) auto_marks.append(auto_mark) print u'第%d个表格标注完成'%nn with open('../result/wiki/taiyun/auto_mark.data','w') as f: json.dump(auto_marks, f)
def MakeModel(): checkpoint_path = '/tmp/' + str(datetime.datetime.now()) optimizer = keras.optimizers.Adam(lr=0.0006, beta_1=0.96, beta_2=0.99999, epsilon=1e-2) descriptor = 'Final training of model' filefmt = 'weights.Epoch-{epoch:03d};Loss-{val_loss:.6f}.hdf5' #Model = ML.SimpleModel(Train_data,optimizer) Model = ML.ModularModel(Train_data, optimizer, layers=4, nodes=4 * 256) History = ML.TrainModel(Model, Train_data, Train_label, EPOCHS=500, min_delta=0.0, patience=20, PERIOD=0, BATCH=45, val_data=tuple([Test_data, Test_label]), checkpoint_path=checkpoint_path, file_name=filefmt, Descriptor=descriptor) ML.PlotHistory( History, save_path=checkpoint_path.replace('.', ':').replace(':', '-') + '/') Predictions = ML.Predict(Model, Test_data) grph.PlotHistory( Predictions, Test_label, os.getcwd().replace('\\', '/') + checkpoint_path.replace('.', ':').replace(':', '-') + '/') grph.PlotHistory2018( Predictions, Test_label, os.getcwd().replace('\\', '/') + checkpoint_path.replace('.', ':').replace(':', '-') + '/') grph.PlotHistoryDiff( Predictions, Test_label, os.getcwd().replace('\\', '/') + checkpoint_path.replace('.', ':').replace(':', '-') + '/') grph.PlotHistory2018percent( Predictions, Test_label, os.getcwd().replace('\\', '/') + checkpoint_path.replace('.', ':').replace(':', '-') + '/') Offset = Predictions - Test_label OffsetP = Offset / Test_label * 100 return History, Model, OffsetP, Offset
def draw_clusterings_kmeans(result, filetrace): label = result[0] n_cluster = len(set(label)) X = np.array(result[1]) # PCA 降维 X = MachineLearning.skl_pca(X, demen=2) x_standard = X[0] # 对数据进行[0,1]标准化 min_max_scaler = preprocessing.MinMaxScaler() # 标准化训练集数据 x_standard = min_max_scaler.fit_transform(x_standard) cluster = [[] for i in range(n_cluster)] for i in range(len(label)): for j in range(n_cluster): if label[i] == j: cluster[j].append(x_standard[i]) # 保存文件 filenumber = 0 for i in cluster: filenumber = filenumber + 1 filename = 'KMeans_cluster-cluter' + str( filenumber) + r'-Normalization.csv' f = filetrace + '\\' + 'File after Processing' + '\\' + filename np.savetxt(f, i, fmt='%s', delimiter=',') print('KMeans 2D Image File made!')
def draw_clusterings_svm(result, filetrace, filename_mark): label = result[0] print(len(label)) X = np.array(result[1]) # PCA 降维 X = MachineLearning.skl_pca(X, demen=2) x_standard = X[0] # 对数据进行[0,1]标准化 min_max_scaler = preprocessing.MinMaxScaler() # 标准化训练集数据 x_standard = min_max_scaler.fit_transform(x_standard) cluster_twin = [] cluster_kink = [] for i in range(len(label)): if label[i] == 0: cluster_twin.append(x_standard[i]) if label[i] == 1: cluster_kink.append(x_standard[i]) # 保存文件 filename1 = 'SVM_cluster-twin-Normalization' f = filetrace + '\\' + 'File after Processing' + '\\' + filename1 + filename_mark + '.csv' np.savetxt(f, cluster_twin, fmt='%s', delimiter=',') filename2 = 'SVM_cluster-kink-Normalization' f = filetrace + '\\' + 'File after Processing' + '\\' + filename2 + filename_mark + '.csv' np.savetxt(f, cluster_kink, fmt='%s', delimiter=',') print('SVM 2D Image File made!')
def model_test(file, filename, filetrace, filetrace_file_cluster, smooth): f = Screening.read_file(file) data_fre_range_fre = Screening.data_fre( f, filetrace_file_cluster, smooth) # [[frequency range], [[fre1],[fre2],[fre3]...[fre-n]]] print(data_fre_range_fre[1]) result = MachineLearning.svm_model(data_fre_range_fre[1], filetrace) file = filetrace + r'\Model Test' + '\\' + filename + '-Label.csv' np.savetxt(file, result[0], fmt='%s', delimiter=',') print('Model Test Over!') # 测试文件为twin: n_twin = 0 n_kink = 0 print(len(result[0])) for i in result[0]: if i == 0: n_twin = n_twin + 1 if i == 1: n_kink = n_kink + 1 # print('kink: ',n_kink) # print('twin: ', n_twin) accuracy = n_twin / (n_twin + n_kink) print('Accuracy: ', accuracy) txtfile = ['Accuracy: ' + str(accuracy)] file = filetrace + r'\Model Test' + '\\' + filename + '-Accuracy.txt' np.savetxt(file, txtfile, fmt='%s', delimiter=',')
def run(filename): print "Reading File" training,test=getimagelists(filename) features=[] labels=[] d={} for line in training: print "Processing file: " + line.split(tab)[0] l,f,d=processtrainingimage.process(line,d) for i in range(len(f)): features.append(f[i]) for i in range(len(l)): labels.append(l[i]) o=open('output.txt','w') print "outputting" for i in xrange(len(labels)): output = labels[i] for j in xrange(len(features[i])): output+=tab+str(features[i][j]) o.write(output+'\n') print "Converting nested list to array" features=np.array(features,dtype=float) ''' features, labels= loadFile("output.txt") ''' print "Building Machine Learning Models" model = MachineLearning.ml(features,labels) print "Starting Testing Images!" for line in test: print "Processing file: " + line.split(tab)[0] ProcessTestImage.runWalk(line,40,model)
def reproduction(self, qq0, qqe, tt, T): ''' :param xx0: 规划起点 :param gg: 规划目标点 :param tt: 规划中采样时刻 :return: 采样时刻对应的规划位置 ''' # 规划时间 ss = np.exp(-(self.alpha / self.tau) * tt) num = len(ss) # 求取强迫项,用rbf求取强迫性 f = np.zeros([num, self.m]) for i in range(num): f[i, :] = ml.rbf_oput_nout(ss[i], self.c, self.sigma, self.w) self.f = f print f.shape # 求取末端位置 XX = np.zeros([num, self.m]) x_dot = np.zeros(self.m) xx = np.copy(qq0) # 采用迭代发求微分方程 for i in range(num): for j in range(self.m): [xx[j], x_dot[j]] = dmps_solve_2( self.tau, self.k[j], self.d[j], qqe[j], qq0[j], ss[i], T, f[i, j], xx[j], x_dot[j]) XX[i, j] = xx[j] self.xx = np.copy(XX) return self.xx
def kmeans_machine_learn(file, filetrace, filetrace_file_cluster, smooth, cluster): print('kmeans_machine_learn is running') f = Screening.read_file(file) # 提取出所有有用信号的频域向量 frequency = Screening.data_fre( f, filetrace_file_cluster, smooth) # [[frequency range], [[fre1],[fre2],[fre3]...[fre-n]]] fre_range = frequency[0] fre = frequency[1] # kmeans 学习部分 label_fre = MachineLearning.skl_kmeans( fre, cluster=cluster) # [cluster_label, data_fre] result = MachineLearning.kmeans_ave_fre( label_fre, fre_range, cluster ) # [[frequency range], [[fre1_cluster1],[fre2_cluster2],[fre3_cluster3],[fre4_cluster4], ...]] n = len(set(label_fre[0])) title = ['fre-range'] for i in range(n): title.append(str(i)) temp = [result[0]] for i in result[1]: temp.append(i) temp1 = [title] temp = np.transpose(temp).tolist() for i in temp: temp1.append(i) # 保存文件 filename = 'Kmeans_averange_frequency.csv' f = filetrace + '\\' + 'File after Processing' + '\\' + filename np.savetxt(f, temp1, fmt='%s', delimiter=',') print('Kmeans_averange_frequency File made') # 保存文件, 每个cluster的平均频谱 filename = 'Kmeans_label.csv' f = filetrace + '\\' + 'File after Processing' + '\\' + filename np.savetxt(f, label_fre[0], fmt='%s', delimiter=',') print('Kmeans_label File made') # 绘制分类结果二维视图 # result = [label, [[cluster1],[cluster2],[cluster3],...]] DrawImage.draw_clusterings_kmeans(label_fre, filetrace) return label_fre[0]
def bilibili_train(): csv_path = r'../data/bilibili_data.csv' data = bilibili_read_data(csv_path) data = divide_data(data[0], data[1]) variables = ['danmu', 'reply', 'favorite', 'coin', 'share', 'like'] print(len(data.get('train_vecs'))) print(len(data.get('train_exps'))) train_vecs = data.get('train_vecs') train_exps = data.get('train_exps') MachineLearning.normalize_median(train_vecs, train_exps) model = MachineLearning.perceptron(variables=variables, train_vecs=train_vecs, train_exps=train_exps) model.train(train_iter_num=10000, rate=0.01)
def wine_train(): csv_path = r'winequality-red.csv' input_vecs = [] input_exps = [] with open(csv_path, 'r', encoding='utf-8') as file: reader = csv.reader(file, delimiter=';') for line in reader: # print(line) temp = [eval(_i) for _i in line] line = temp input_vecs.append(line[0:-1]) # print(input_vecs) # import os # os._exit(-1) input_exps.append(line[-1]) features = [ 'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', ] variables = [0.0 for _ in features] MachineLearning.normalize_median(input_vecs, input_exps) model = MachineLearning.perceptron(variables=variables, train_vecs=input_vecs, train_exps=input_exps) model.train(train_iter_num=10000, rate=0.0001)
def svm_machine_learn_model(file_cluster, filetrace_cluster, filetrace, draw=0, smooth=0): # 有模型可以导入的话 # 读需要分类的数据文件 print('svm_machine_learn with model is running') f = Screening.read_file(file_cluster) # 提取出所有有用信号的频域向量 result = Screening.data_fre(f, filetrace_cluster, smooth) fre_range = result[0] fre = result[1] label_fre = MachineLearning.svm_model( fre, filetrace) # [cluster_label, data_fre] result = MachineLearning.ave_fre( label_fre, fre_range) # [[frequency range], [[fre1_twin],[fre2_kink]]] temp = [['fre-range', 'twin', 'kink']] array = np.array([result[0], result[1][0], result[1][1]]) array = array.T # 第一列fre-range,第二列twin,第三列kink arr = array.tolist() for i in arr: temp.append(i) # 保存文件 filename = 'SVM_averange_frequency.csv' # filetrace = r'C:\Users\liuhanqing\Desktop\test\AE' f = filetrace + '\\' + filename np.savetxt(f, temp, fmt='%s', delimiter=',') print('SVM_averange_frequency File made') # 保存文件, 每个cluster的平均频谱 filename = 'SVM_label.csv' # filetrace = r'C:\Users\liuhanqing\Desktop\test\AE' f = filetrace + '\\' + filename np.savetxt(f, label_fre[0], fmt='%s', delimiter=',') print('SVM_label.csv File made') return label_fre[0]
def CVTest(test='layers', start=4, finish=14, optimizer=keras.optimizers.Adam(lr=0.001, beta_1=0.95, beta_2=0.999, epsilon=1e-4)): val_labels = Data_NoNAN.loc[:, 'Diesel':'Total'] val_data = fcn.StandardizeData(Data_NoNAN.loc[:, 'Year':'Gust']).fillna(0) listOfErrors = ML.CrossValidation(data=val_data, labels=val_labels, test=test, start=start, finish=finish, optimizer=optimizer) return listOfErrors
def learn(self): # 计算DMPS模型计算强迫项 f_demo = np.zeros([self.num, self.n]) for i in range(self.n): f_demo[:, i] = (self.tau * self.tau * self.qq_qva[:, i, 2] + self.d[i] * self.qq_qva[:, i, 1]) \ / self.k[i] - (self.qq_qva[-1, i, 0] * np.ones(self.num) - self.qq_qva[:, i, 0]) \ - (self.qq_qva[-1, i, 0] - self.qq_qva[0, i, 0]) * self.ss self.f_demo = f_demo # 采用rbf拟合强迫项,求取rbf参数 # 中心值,h*m个,时间变量为m=1 c = np.linspace(self.ss[0], self.ss[-1], self.h) # 方差 sigma = abs(self.ss[0] - self.ss[-1]) # rbf隐藏层到输出层权重n*h w = ml.rbf_weight_oput_nout(self.ss, c, sigma, f_demo) # 存储rbf参数 rbf_param = np.zeros([self.n + 2, self.h]) rbf_param[0, 0] = sigma # 存在第1位,方差sigma rbf_param[1, :] = c # 第二行存放中心值 rbf_param[2:self.n + 2, :] = w # 第三行后存储权重 self.rbf_param = rbf_param
def runExperiment(df, feature_sets, times, batch_size): tick = time.time() outputs = [] for feature_set in feature_sets: classifiers = dict( online=onlineML.getOnlineClassifiers(), offline=offlineML.getOfflineClassifiers(df.shape[1]) ) y, X = ml.getDataForML(df, features=feature_set, feature_predict=feature_predict, sampling=False) n_features = X.shape[1] print 'total features ', n_features print 'total samples ', X.shape[0] times['preparing_time'] += time.time() - tick cls_stats = offlineML.runOfflineML(y, X, classifiers['online']) output = [cls_stats, classifiers['online'], feature_set, '5fold'] outputs.append(output) # online cls_stats = onlineML.runOnlineML(y, X, classifiers['online'], batch_size=batch_size) output = [cls_stats, classifiers['online'], feature_set, str(batch_size) + 'batch'] outputs.append(output) #cls_name, cls in classifiers.items(): cls_stats = offlineML.runOfflineML(y, X, classifiers['offline']) output = [cls_stats, classifiers['offline'], feature_set, '5fold'] outputs.append(output) cls_stats = onlineML.runOnlineML(y, X, classifiers['offline'], batch_size=batch_size) output = [cls_stats, classifiers['offline'], feature_set, str(batch_size) + 'batch'] outputs.append(output) #saveClassificationResults(DIR + 'results/accuracy_ML.csv', output) #theanoTest(y,X) pt.plotEverything(cls_stats, times, len(X)) return outputs
def main(): reduced_filename = KNMI.PATH[:KNMI.PATH.rindex('.')] + ".csv" df = pd.read_csv(reduced_filename) trn, dev, tst = ml.Lq_Fit.seperate_trn_dev_tst(df) final_filename = KNMI.PATH[:KNMI.PATH.rindex('.')] + "_final.csv" df_final = pd.read_csv(final_filename) att = input("Which attribute do you want to analyse? ") att = att.upper() print("You asked for:", KNMI.attributes[att]) uni.att_values(df, att) uni.boxplot_att(df, att, save=False) uni.histogram_att(df, att, save=False) plot_att_year(df, [], att) plot_att_year_bok(df, [], att) plot_att_month(df, [], att) for other_att in MEAN_ATTS: print("\nFinding correlation of", KNMI.attributes[att], "with", KNMI.attributes[other_att]) print("Correlation is", df_final[att].corr(df[other_att])) plot_att_conditional(df, [], att, other_att) choice = "" while choice != "y" and choice != "n" and choice != "s": choice = input("Do you want regression over these two " + "attributes?\nyes (y), no (n), " + "yes with switched axis (s): ") choice = choice.lower() if choice == "y": poly = ml.try_poly_fit(trn, dev, att, other_att) ml.plot_poly(tst, poly, att, other_att) elif choice == "s": poly = ml.try_poly_fit(trn, dev, other_att, att) ml.plot_poly(tst, poly, other_att, att)
def machine_learning(): algo_list = ["knn", "svm", "gbc", "rfc", "nn"] # Check argument if request.args.get('images_directory') is None : return 'No "images_directory" given.' if request.args.get('algorithm') is None : return 'No "algorithm" given.' if request.args.get('save_directory') is None : return 'No "save_directory" given.' images_directory = request.args.get('images_directory') algorithm = str(request.args.get('algorithm')) save_directory = request.args.get('save_directory') # creates new MachineLearning object if algorithm == "nn": ml = MachineLearning(images_directory, save_directory, 32) else: ml = MachineLearning(images_directory, save_directory) # error detection if len(ml.imgs) == 0 or len(ml.labels) == 0: app.logger.error("No images were read!") return "Error: No images were read!" if algorithm in algo_list: score_train, score_test = ml.train(algorithm, ml.imgs, ml.labels) else: app.logger.warning("Unexpected algorithm choice, choosing default!") algorithm = "svm" score_train, score_test = ml.train(algorithm, ml.imgs, ml.labels) return '\"' + algorithm + '\":{\"train_acc\":'+str(score_train)+' ,\"val_acc\":'+str(score_test)+'}'
def __init__(self, network, config, reward_engine): super().__init__(network, config) self.epoch_counter = self.counters[config['AgentEpochCounter']] self.iter_counter = self.counters[config['AgentIterationCounter']] # Placeholder self.temp = tf.placeholder(shape=[1], dtype=tf.float32) self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32) # Algorithm self.output = tf.reshape(self.output_layer, [-1]) self.prob_dist = tf.nn.softmax(self.output / self.temp) self.weight = tf.slice(self.output, self.action_holder, [1]) self.loss = -(tf.math.log(self.weight) * self.reward_holder) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.config['AgentLearningRate']) self.update = self.optimizer.minimize(self.loss) # Processor self.exploration = ML.Exploration(self) self.exp_buffer = ML.ExperienceBuffer(self) self.state_space = ML.StateSpace(self) self.action_space = ML.ActionSpace(self) self.reward_engine = ML.RewardEngine(self, reward_engine) self.recorder = ML.Recorder(self)
mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5,3), random_state=1) #creating mlp object majority = VotingClassifier(estimators=[("svm",svm1),("rf",rf),("mlp",mlp)], voting = "hard") #creating majority vote object #Cross validation iterators skf = StratifiedKFold(n_splits=5) loo = LeaveOneOut() #outlier detectors angleBased = abod.ABOD(method="fast") isolationForrest = iforest.IForest(n_estimators=10, behaviour="new") kNearestNeighbors = knn.KNN(method="median",n_neighbors=5) detector = kNearestNeighbors #datasets cardboard = ml.parseCsv("C:\\Users\\Luke\\Documents\\GitHub\\UREP_Cancer_Detection_Array_Microwave_Sensor\\results\\Deltas\\DDeltas.csv") wood = ml.parseCsv("C:\\Users\\Luke\\Documents\\GitHub\\UREP_Cancer_Detection_Array_Microwave_Sensor\\results\\Deltas\\EDeltas.csv") plastic = ml.parseCsv("C:\\Users\\Luke\\Documents\\GitHub\\UREP_Cancer_Detection_Array_Microwave_Sensor\\results\\Deltas\\BDeltas.csv") plastic = plastic+ml.parseCsv("C:\\Users\\Luke\\Documents\\GitHub\\UREP_Cancer_Detection_Array_Microwave_Sensor\\results\\Deltas\\FDeltas.csv") #standard expected values expected = [[0 for x in range(50)],[1 for x in range(50)]] #removing outliers cardboard = ml.removeOutliers(cardboard, detector, True) wood = ml.removeOutliers(wood, detector, True) plastic = ml.removeOutliers(plastic, detector, True) #creating expected values after outlier removal cardboardEV = [[0 for i in range(len(cardboard))],[1 for i in range(len(cardboard))]]
# cv2.imshow("origin", img) # cv2.imshow("origin",img) # 参数传入函数 BLCSZ = 2 * cv2.getTrackbarPos('BLOCKSIZE', 'image') + 3 Csize = cv2.getTrackbarPos('C', 'image') #img1 = iPP.BeBinary(target) if k == -1: #img = cv2.imread('D:/new.png', 0) #img = cv2.imread('D:/PProject/pic.png', 0) # e1 = cv2.getTickCount() ret, bkp = iPP.PreProcess(img, 33, 20, 1) cv2.imshow("origin", bkp) # wxbmp = wx.BitmapFromBuffer(720, 1280, bkp) # e2 = cv2.getTickCount() # t = (e2 - e1) / cv2.getTickFrequency() # print(t) if ret: result = ML.ocr() resultnow = '' resultnow += str(int(result[0][0])) resultnow += str(int(result[1][0])) resultnow += str(int(result[2][0])) if (resultnow == resultlast): cv2.putText(bkp, resultnow, org, fontFace, fontScale, fontcolor, thickness, lineType) content_text.SetValue(resultnow) frame0.Show() resultlast = resultnow
print("NaN's in data set after standardization: " + str(Data_Stand.isnull().sum().sum())) Data_NoNAN = fcn.CreateCapData(Data_NoNAN) if not os.path.isfile(PC.GraphPath + 'ScatterPlot_Norm.png'): print('Normalized scatterplot not existing, creating it') grph.ScatterMatrix(PC.GraphPath, Data_Stand, 'ScatterPlot_Norm') Data_NoNAN.describe() #if (not os.path.isfile(PC.DataPath+'Trainlab.pkl') or # not os.path.isfile(PC.DataPath+'Traindat.pkl') or # not os.path.isfile(PC.DataPath+'Testdat.pkl') or # not os.path.isfile(PC.DataPath+'Testdat.pkl')): Train_label, Train_data, Test_label, Test_data = ML.Split(Data_NoNAN) # Train_label.to_pickle(PC.DataPath+'Trainlab.pkl') # Train_data.to_pickle(PC.DataPath+'Traindat.pkl') # Test_label.to_pickle(PC.DataPath+'Testlab.pkl') # Test_data.to_pickle(PC.DataPath+'Testdat.pkl') #else: # Train_label = pd.read_pickle(PC.DataPath+'Trainlab.pkl') # Train_data = pd.read_pickle(PC.DataPath+'Traindat.pkl') # Test_label = pd.read_pickle(PC.DataPath+'Testlab.pkl') # Test_data = pd.read_pickle(PC.DataPath+'Testdat.pkl') # Standardize the data based on mean and std dev of train data # Standardize test data first, so that it doesn't standardize # on already standardized data Test_data = fcn.StandardizeData(Test_data, std_Data=Train_data).fillna(0)
def main(): # Set seed np.random.seed(0) # Create the data frames from files all_patients = pd.read_csv("data/all_pats.csv") all_visits = pd.read_csv("data/all_visits.csv") all_updrs = pd.read_csv("data/all_updrs.csv") all_updrs_subcomponents = pd.read_csv("data/itemizedDistributionOfUPDRSMeaning_Use.csv") # Enrolled PD / Control patients pd_control_patients = all_patients.loc[ ((all_patients["DIAGNOSIS"] == "PD") | (all_patients["DIAGNOSIS"] == "Control")) & ( all_patients["ENROLL_STATUS"] == "Enrolled"), "PATNO"].unique() # Data for these patients pd_control_data = all_visits[all_visits["PATNO"].isin(pd_control_patients)] # Merge with UPDRS scores pd_control_data = pd_control_data.merge(all_updrs[["PATNO", "EVENT_ID", "TOTAL"]], on=["PATNO", "EVENT_ID"], how="left") # Get rid of nulls for UPDRS pd_control_data = pd_control_data[pd_control_data["TOTAL"].notnull()] # Merge with patient info pd_control_data = pd_control_data.merge(all_patients, on="PATNO", how="left") # TODO: Merge patient's SC features onto baseline if times are close # Only include baseline and subsequent visits pd_control_data = pd_control_data[ (pd_control_data["EVENT_ID"] != "ST") & ( pd_control_data["EVENT_ID"] != "U01") & (pd_control_data["EVENT_ID"] != "PW") & ( pd_control_data["EVENT_ID"] != "SC")] # Encode to numeric mL.clean_data(data=pd_control_data, encode_auto=["GENDER.x", "DIAGNOSIS", "HANDED"], encode_man={ "EVENT_ID": {"BL": 0, "V01": 1, "V02": 2, "V03": 3, "V04": 4, "V05": 5, "V06": 6, "V07": 7, "V08": 8, "V09": 9, "V10": 10, "V11": 11, "V12": 12}}) # TODO: Optimize flexibility with NAs # Eliminate features with more than 20% NAs for feature in pd_control_data.keys(): if len(pd_control_data.loc[pd_control_data[feature].isnull(), feature]) / len( pd_control_data[feature]) > 0.2: pd_control_data = pd_control_data.drop(feature, 1) # TODO: Rethink this # Eliminate features with more than 30% NA at Baseline for feature in pd_control_data.keys(): if len(pd_control_data.loc[ (pd_control_data["EVENT_ID"] == 0) & (pd_control_data[feature].isnull()), feature]) / len( pd_control_data[pd_control_data["EVENT_ID"] == 0]) > 0.3: pd_control_data = pd_control_data.drop(feature, 1) # TODO: Imputation # Drop rows with NAs pd_control_data = pd_control_data.dropna() # Drop duplicates (keep first, delete others) pd_control_data = pd_control_data.drop_duplicates(subset=["PATNO", "EVENT_ID"]) # Drop patients without BL data for patient in pd_control_data["PATNO"].unique(): if patient not in pd_control_data.loc[pd_control_data["EVENT_ID"] == 0, "PATNO"].unique(): pd_control_data = pd_control_data[pd_control_data["PATNO"] != patient] # Select all features in the data set all_data_features = list(pd_control_data.columns.values) for updrs_subscomponent in all_updrs_subcomponents["colname"].tolist(): print(updrs_subscomponent) for i in range(0, 4): if all_updrs_subcomponents.loc[ all_updrs_subcomponents["colname"] == updrs_subscomponent, "use{}".format(i)].min() == 1: # Generate features (and update all features list) train = generate_features(data=pd_control_data, features=all_data_features, file="data/PPMI_train.csv", action=True, updrs_subsets=True, time=True, future=False, milestones=True, slopes=False, score_name=updrs_subscomponent, milestone_feature=updrs_subscomponent, milestone_value=i) # Initialize predictors as all features predictors = list(train.columns.values) # Initialize which features to drop from predictors drop_predictors = ["PATNO", "EVENT_ID", "INFODT", "INFODT.x", "ORIG_ENTRY", "LAST_UPDATE", "PAG_UPDRS3", "PRIMDIAG", "COMPLT", "INITMDDT", "INITMDVS", "RECRUITMENT_CAT", "IMAGING_CAT", "ENROLL_DATE", "ENROLL_CAT", "ENROLL_STATUS", "BIRTHDT.x", "GENDER.y", "APPRDX", "GENDER", "CNO", "TIME_FUTURE", "TIME_NOW", "SCORE_FUTURE", "SCORE_SLOPE", "TIME_OF_MILESTONE", "TIME_UNTIL_MILESTONE", "BIRTHDT.y", "TIME_SINCE_DIAGNOSIS", "TIME_SINCE_FIRST_SYMPTOM", "TIME_FROM_BL"] # List of UPDRS components updrs_components = ["NP1COG", "NP1HALL", "NP1DPRS", "NP1ANXS", "NP1APAT", "NP1DDS", "NP1SLPN", "NP1SLPD", "NP1PAIN", "NP1URIN", "NP1CNST", "NP1LTHD", "NP1FATG", "NP2SPCH", "NP2SALV", "NP2SWAL", "NP2EAT", "NP2DRES", "NP2HYGN", "NP2HWRT", "NP2HOBB", "NP2TURN", "NP2TRMR", "NP2RISE", "NP2WALK", "NP2FREZ", "PAG_UPDRS3", "NP3SPCH", "NP3FACXP", "NP3RIGN", "NP3RIGRU", "NP3RIGLU", "PN3RIGRL", "NP3RIGLL", "NP3FTAPR", "NP3FTAPL", "NP3HMOVR", "NP3HMOVL", "NP3PRSPR", "NP3PRSPL", "NP3TTAPR", "NP3TTAPL", "NP3LGAGR", "NP3LGAGL", "NP3RISNG", "NP3GAIT", "NP3FRZGT", "NP3PSTBL", "NP3POSTR", "NP3BRADY", "NP3PTRMR", "NP3PTRML", "NP3KTRMR", "NP3KTRML", "NP3RTARU", "NP3RTALU", "NP3RTARL", "NP3RTALL", "NP3RTALJ", "NP3RTCON"] # Drop UPDRS components # drop_predictors.extend(updrs_components) # Drop unwanted features from predictors list for feature in drop_predictors: if feature in predictors: predictors.remove(feature) # Target for the model target = "TIME_UNTIL_MILESTONE" # Algs for model # Grid search (futures): n_estimators=50, min_samples_split=75, min_samples_leaf=50 # Futures: n_estimators=150, min_samples_split=100, min_samples_leaf=25 # Grid search (slopes): 'min_samples_split': 75, 'n_estimators': 50, 'min_samples_leaf': 25 algs = [ RandomForestRegressor(n_estimators=150, min_samples_split=100, min_samples_leaf=25, oob_score=True), LogisticRegression(), SVC(probability=True), GaussianNB(), MultinomialNB(), BernoulliNB(), KNeighborsClassifier(n_neighbors=25), GradientBoostingClassifier(n_estimators=10, max_depth=3)] # Alg names for model alg_names = ["Random Forest", "Logistic Regression", "SVM", "Gaussian Naive Bayes", "Multinomial Naive Bayes", "Bernoulli Naive Bayes", "kNN", "Gradient Boosting"] # TODO: Configure ensemble # Ensemble ens = mL.ensemble(algs=algs, alg_names=alg_names, ensemble_name="Weighted ensemble of RF, LR, SVM, GNB, KNN, and GB", in_ensemble=[True, True, True, True, False, False, True, True], weights=[3, 2, 1, 3, 1, 3], voting="soft") # Add ensemble to algs and alg_names # algs.append(ens["alg"]) # alg_names.append(ens["name"]) # Parameters for grid search grid_search_params = [{"n_estimators": [50, 150, 300, 500, 750, 1000], "min_samples_split": [4, 8, 25, 50, 75, 100], "min_samples_leaf": [2, 8, 15, 25, 50, 75, 100]}] # Display ensemble metrics metrics1 = mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names, cross_val=[True], scoring="r2") all_updrs_subcomponents.loc[ all_updrs_subcomponents["colname"] == updrs_subscomponent, "over{}_r2".format(i)] = \ metrics1["Cross Validation r2"] # Display ensemble metrics metrics2 = mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names, cross_val=[True], scoring="root_mean_squared_error") all_updrs_subcomponents.loc[ all_updrs_subcomponents["colname"] == updrs_subscomponent, "over{}_rmse".format(i)] = \ metrics2["Cross Validation root_mean_squared_error"] all_updrs_subcomponents.to_csv("data/updrs_subcomponents_scores.csv")
def model_origin_data(file_twin, file_kink, filetrace_twin, filetrace_kink, filetrace, filename_mark, smooth): # 读twin的数据文件 # 没有模型可以导入的话 print('svm_machine_learn with no model is running') f_twin = Screening.read_file(file_twin) # 提取出所有有用信号的频域向量 result1 = Screening.data_fre(f_twin, filetrace_twin, smooth) fre_range = result1[0] fre_twin = result1[1] # 读kink的数据文件 f_kink = Screening.read_file(file_kink) result2 = Screening.data_fre( f_kink, filetrace_kink, smooth) # [[frequency range], [[fre1],[fre2],[fre3]...[fre-n]]] fre_kink = result2[1] # 合并数据,并保存label label_true = [0 for i in range(len(fre_twin))] for i in range(len(fre_kink)): label_true.append(1) fre = fre_twin for i in fre_kink: fre.append(i) # 测试模型精确度 model_path = filetrace + '\\' + r'Model\train0_model.m' model_svm = joblib.load(model_path) label_cluster = model_svm.predict(fre) accuracy = accuracy_score(label_true, label_cluster) print('accuracy: ', accuracy) print('y_test: ', label_true) print('y_predicted: ', list(label_cluster)) # 生成两个cluster文件 cluster_twin = [] cluster_kink = [] for i in range(len(label_cluster)): if label_cluster[i] == 0: cluster_twin.append(fre[i]) if label_cluster[i] == 1: cluster_kink.append(fre[i]) filename = 'SVM_cluster-twin' f = filetrace + '\\' + 'File after Processing' + '\\' + filename + filename_mark + '.csv' np.savetxt(f, cluster_twin, fmt='%s', delimiter=',') filename = 'SVM_cluster-kink' f = filetrace + '\\' + 'File after Processing' + '\\' + filename + filename_mark + '.csv' np.savetxt(f, cluster_kink, fmt='%s', delimiter=',') print('Twin-kink File made') # 生成average-frequency label_fre = [label_cluster, fre] # [cluster_label, data_fre] result = MachineLearning.ave_fre( label_fre, fre_range) # [[frequency range], [[fre1_twin],[fre2_kink]]] temp = [['fre-range', 'twin', 'kink']] array = np.array([result[0], result[1][0], result[1][1]]) array = array.T # 第一列fre-range,第二列twin,第三列kink arr = array.tolist() for i in arr: temp.append(i) # 保存文件 filename = 'SVM_averange_frequency' f = filetrace + '\\' + 'File after Processing' + '\\' + filename + filename_mark + '.csv' np.savetxt(f, temp, fmt='%s', delimiter=',') print('SVM_averange_frequency-origin_data File made') # 绘制2D可视化视图 result1 = [label_cluster, fre] DrawImage.draw_clusterings_svm(result1, filetrace, filename_mark)
def PlayGame(opponent): global board board = Functions.BoardInit() player = 1 UpdateBoard(player) if (opponent == "Human"): player = randint(1, 2) movesAvailable = 1 gamePlaying = 1 while gamePlaying != 0: if (Functions.GameOver(board) == 1): gamePlaying = 0 score = Functions.BlackWhiteCount(board) if (score > 0): winner = 1 elif (score < 0): winner = 2 else: winner = 0 if (winner != 0): print("Game Over! The winner is player ", winner) else: print("It's a tie!") board = Functions.BoardInit() cont.set(0) window.wait_variable(cont) player = Functions.nextPlayer(board, player) time.sleep(0.25) if (opponent == "Rand"): while Functions.GameOver(board) == 0: player = randint(1, 2) movesAvailable = 1 gamePlaying = 1 while gamePlaying != 0: UpdateBoard(player) if (Functions.GameOver(board) == 1): gamePlaying = 0 score = Functions.BlackWhiteCount(board) if (score > 0): winner = 1 elif (score < 0): winner = 2 else: winner = 0 if (winner != 0): print("Game Over! The winner is player ", winner) else: print("It's a tie!") board = Functions.BoardInit() movesAvailable = Functions.MovesAvailable(board, player) cont.set(0) if (player == 1): window.wait_variable(cont) player = Functions.nextPlayer(board, player) time.sleep(0.25) while (player == 2): movesAvailable = Functions.MovesAvailable(board, player) print("movesAvailable", movesAvailable) if (movesAvailable): numPicked = randint(0, len(movesAvailable) - 1) print("numPicked", numPicked) movePicked = movesAvailable[numPicked] print("movePicked from movesAvailable", movePicked[0], movePicked[1]) x = int(movePicked[1]) y = int(movePicked[0]) board = Functions.MakeMove(board, y, x, player) player = Functions.nextPlayer(board, player) elif (opponent == "MinMax"): while Functions.GameOver(board) == 0: player = randint(1, 2) UpdateBoard(player) movesAvailable = 1 gamePlaying = 1 while gamePlaying != 0: UpdateBoard(player) if (Functions.GameOver(board) == 1): gamePlaying = 0 score = Functions.BlackWhiteCount(board) if (score > 0): winner = 1 elif (score < 0): winner = 2 else: winner = 0 if (winner != 0): print("Game Over! The winner is player ", winner) else: print("It's a tie!") board = Functions.BoardInit() movesAvailable = Functions.MovesAvailable(board, player) cont.set(0) if (player == 1): window.wait_variable(cont) player = Functions.nextPlayer(board, player) time.sleep(0.25) while (player == 2): nextX = 0 nextY = 0 movesAvailable = Functions.MovesAvailable( board, player) # all available moves bestPred = 0 j = 0 while (j <= len(movesAvailable) - 1 and movesAvailable): # for each ove in movesAvailable moveTest = movesAvailable[j] x = int(moveTest[1]) y = int(moveTest[0]) testBoard = Functions.BoardCopy(board) testBoard = Functions.MakeMove(testBoard, y, x, player) pred = np.sum(Functions.boardToNN( testBoard, player)) # get the output of an available move #finds the move with the highest output, output of 1 means player 1 is guaranteed to win if (j == 0): bestPred = pred nextX = x nextY = y else: if (pred < bestPred): bestPred = pred nextX = x nextY = y j += 1 board = Functions.MakeMove(board, nextY, nextX, player) player = Functions.nextPlayer(board, player) elif (opponent == "Network"): ops.reset_default_graph() numInp = 65 # the number of inputs for the neural network numLabel = 1 # the number of inputs for the labels # Create Placeholders for input and label inp, label = ml.placeholders(numInp, numLabel) # Initialise parameters parameters = ml.initialiseParameters() #make the nerual network out = ml.network(inp, parameters) modelPath = "./save/NetworkPlayer" # Initialise all the variables init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) #loads the network in or initialises a new network try: saver.restore(sess, modelPath) #print("Model restored from file: %s" % modelPath) except: #sessctd = False print("Initialiing") while Functions.GameOver(board) == 0: board = Functions.BoardInit() player = randint(1, 2) UpdateBoard(player) movesAvailable = 1 gamePlaying = 1 while gamePlaying != 0: UpdateBoard(player) if (Functions.GameOver(board) == 1): gamePlaying = 0 score = Functions.BlackWhiteCount(board) if (score > 0): winner = 1 elif (score < 0): winner = 2 else: winner = 0 if (winner != 0): print("Game Over! The winner is player ", winner) else: print("It's a tie!") board = Functions.BoardInit() movesAvailable = Functions.MovesAvailable(board, player) cont.set(0) if (player == 1): window.wait_variable(cont) player = Functions.nextPlayer(board, player) time.sleep(0.25) while (player == 2): nextX = 0 nextY = 0 movesAvailable = Functions.MovesAvailable( board, player) # all available moves bestPred = 0 j = 0 while (j <= len(movesAvailable) - 1 and movesAvailable ): # for each ove in movesAvailable moveTest = movesAvailable[j] x = int(moveTest[1]) y = int(moveTest[0]) testBoard = Functions.BoardCopy(board) testBoard = Functions.MakeMove( testBoard, y, x, player) pred = out.eval(feed_dict={ inp: Functions.boardToNN(testBoard, player) }) # get the output of an available move #finds the move with the highest output, output of 1 means player 1 is guaranteed to win if (j == 0): bestPred = pred nextX = x nextY = y else: if (pred < bestPred): bestPred = pred nextX = x nextY = y j += 1 board = Functions.MakeMove(board, nextY, nextX, player) player = Functions.nextPlayer(board, player)
def training(opponent, batches): import Functions import MachineLearning as ml import tensorflow as tf import numpy as np import time from random import random, randint # from tensorflow import ops tf.reset_default_graph() lr = 0.0001 # the learning rate numInp = 65 # the number of inputs for the neural network numLabel = 1 # the number of inputs for the labels batchSize = 50 # the size of the batches discountRate = 0.99 # the discount rate for temporal difference learning # Create Placeholders for input and label inp, label = ml.placeholders(numInp, numLabel) # Initialise parameters parameters = ml.initialiseParameters() #make the nerual network out = ml.network(inp, parameters) #cost function for use in training cost = ml.computeCost(out, label) #training function optimiser = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost) player1 = "Network" player2 = opponent # Initialise all the variables init = tf.global_variables_initializer() progstart = time.time() saver = tf.train.Saver() expRate = 0.2 p1wins = 0 # number of games player 1 has won overall p2wins = 0 # number of games player 2 has won overall modelPath = "./save/NetworkPlayer" #WLTD1step #testNetScore or testWinLoss i = 1 while i <= (batches): s = 1 winLoss = np.ndarray((1, 1)) board = Functions.BoardInit() boardArray = Functions.boardToNN(board, 1) batchp1wins = 0 batchp2wins = 0 labelArray = np.ndarray((1, 1)) labelArray[0][0] = 0 with tf.Session() as sess: sess.run(init) #loads the network in or initialises a new network try: saver.restore(sess, modelPath) except: print("Initialising") start = time.time() while s <= batchSize: movesAvailable = 1 gamePlaying = 1 player = randint( 1, 2 ) # returns either a 1 or a 2 which determines the starting player board = Functions.BoardInit() nnBoard = Functions.boardToNN(board, player) boardArray = np.concatenate((boardArray, nnBoard), axis=1) gameLabelArray = np.ndarray((1, 1)) while gamePlaying != 0: while ((player1 == "Network" and player == 1) and Functions.GameOver(board) == 0): # if player 1 is a network nextX = 0 nextY = 0 movesAvailable = Functions.MovesAvailable( board, player) # all available moves bestPred = 0 j = 0 if ( random() > expRate ): # most of the time the network will play the move with the highest output while (j <= len(movesAvailable) - 1 and movesAvailable ): # for each ove in movesAvailable moveTest = movesAvailable[j] x = int(moveTest[1]) y = int(moveTest[0]) testBoard = Functions.BoardCopy(board) testBoard = Functions.MakeMove( testBoard, y, x, player) pred = out.eval(feed_dict={ inp: Functions.boardToNN(testBoard, player) }) # get the output of an available move #finds the move with the highest output, output of 1 means player 1 is guaranteed to win if (j == 0): bestPred = pred nextX = x nextY = y else: if (pred > bestPred): bestPred = pred nextX = x nextY = y j += 1 elif ( movesAvailable ): # sometimes the network will play a move completely randomly to better explore all possible moves move = movesAvailable[randint( 0, len(movesAvailable) - 1)] nextX = int(move[1]) nextY = int(move[0]) testBoard = Functions.BoardCopy(board) testBoard = Functions.MakeMove( testBoard, nextY, nextX, player) bestPred = out.eval(feed_dict={ inp: Functions.boardToNN(testBoard, player) }) # get the output of the move winLoss[0][ 0] = bestPred * discountRate # label = output * discount rate gameLabelArray = np.concatenate( (gameLabelArray, winLoss), axis=1) # add the label to the list of labels board = Functions.MakeMove(board, nextY, nextX, player) # update the board nnBoard = Functions.boardToNN(board, player) if (Functions.GameOver(board) == 1 ): # if the game is over result = Functions.BlackWhiteCount( board) #sum(nnBoard)#find the winner winLoss[0][0] = result # if(result>0): # winLoss[0][0] = 1 # elif(result==0): # winLoss[0][0] = 0 # elif(result<0): # winLoss[0][0] = -1 # final label is equal to the winner gameLabelArray = np.concatenate( (gameLabelArray, winLoss), axis=1) # add the label to the list of labels boardArray = np.concatenate( (boardArray, nnBoard), axis=1) # add the board to the list of boards #print(winLoss[0][0]) else: player = Functions.nextPlayer( board, player) # find who plays next boardArray = np.concatenate( (boardArray, nnBoard), axis=1) # add the board to the list of boards while (((player2 == "Rand" and player == 2)) and Functions.GameOver(board) == 0): # if player 2 is random movesAvailable = Functions.MovesAvailable( board, player) # all available moves if (movesAvailable): # pick and play a move at random numPicked = randint(0, len(movesAvailable) - 1) movePicked = movesAvailable[numPicked] x = int(movePicked[1]) y = int(movePicked[0]) board = Functions.MakeMove(board, y, x, player) # add the resulting position to the board array and the label array nnBoard = Functions.boardToNN(board, player) bestPred = out.eval(feed_dict={inp: nnBoard}) winLoss[0][0] = bestPred * discountRate gameLabelArray = np.concatenate( (gameLabelArray, winLoss), axis=1) if (Functions.GameOver(board) == 1 ): # if the game is over result = Functions.BlackWhiteCount( board) #sum(nnBoard)#find the winner winLoss[0][0] = result # if(result>0): # winLoss[0][0] = 1 # elif(result==0): # winLoss[0][0] = 0 # elif(result<0): # winLoss[0][0] = -1 # final label is equal to the winner gameLabelArray = np.concatenate( (gameLabelArray, winLoss), axis=1 ) # add the label to the list of labels boardArray = np.concatenate( (boardArray, nnBoard), axis=1 ) # add the board to the list of boards if (Functions.GameOver(board) == 0): player = Functions.nextPlayer( board, player) # find who plays next boardArray = np.concatenate( (boardArray, nnBoard), axis=1) # add the board to the list of boards while ((player2 == "Network" and player == 2) and Functions.GameOver(board) == 0): nextX = 0 nextY = 0 movesAvailable = Functions.MovesAvailable( board, player) # all available moves bestPred = 0 j = 0 if ( random() > expRate ): # most of the time the network will play the move with the highest output while (j <= len(movesAvailable) - 1 and movesAvailable ): # for each ove in movesAvailable moveTest = movesAvailable[j] x = int(moveTest[1]) y = int(moveTest[0]) testBoard = Functions.BoardCopy(board) testBoard = Functions.MakeMove( testBoard, y, x, player) pred = out.eval(feed_dict={ inp: Functions.boardToNN(testBoard, player) }) # get the output of an available move #finds the move with the lowest output, output of -1 means player 2 is guaranteed to win if (j == 0): bestPred = pred nextX = x nextY = y else: if (pred < bestPred): bestPred = pred nextX = x nextY = y j += 1 elif ( movesAvailable ): # sometimes the network will play a move completely randomly to better explore all possible moves move = movesAvailable[randint( 0, len(movesAvailable) - 1)] nextX = int(move[1]) nextY = int(move[0]) testBoard = Functions.BoardCopy(board) testBoard = Functions.MakeMove( testBoard, nextY, nextX, player) bestPred = out.eval(feed_dict={ inp: Functions.boardToNN(testBoard, player) }) # get the output of the move winLoss[0][ 0] = bestPred * discountRate # label = output * discount rate gameLabelArray = np.concatenate( (gameLabelArray, winLoss), axis=1) # add the label to the list of labels board = Functions.MakeMove(board, nextY, nextX, player) # update the board nnBoard = Functions.boardToNN(board, player) if (Functions.GameOver(board) == 1 ): # if the game is over result = Functions.BlackWhiteCount( board) #sum(nnBoard)#find the winner winLoss[0][0] = result # if(result>0): # winLoss[0][0] = 1 # elif(result==0): # winLoss[0][0] = 0 # elif(result<0): # winLoss[0][0] = -1 # final label is equal to the winner gameLabelArray = np.concatenate( (gameLabelArray, winLoss), axis=1) # add the label to the list of labels boardArray = np.concatenate( (boardArray, nnBoard), axis=1) # add the board to the list of boards #print(winLoss[0][0]) else: player = Functions.nextPlayer( board, player) # find who plays next boardArray = np.concatenate( (boardArray, nnBoard), axis=1) # add the board to the list of boards if (Functions.GameOver(board) == 1): #if the game ends concArray = np.copy( gameLabelArray[0] [1:]) # copy all the data from the array of labels size = len(concArray) concArray = np.reshape(concArray, (1, size)) """ reshape the array and remove the first value, this means the label for the first board state is equal to the output of the next move * the discount rate """ labelArray = np.concatenate((labelArray, concArray), axis=1) gamePlaying = 0 Winner = Functions.BlackWhiteCount( board) #sum(nnBoard) if (Winner > 0): batchp1wins += 1 p1wins += 1 elif (Winner < 0): batchp2wins += 1 p2wins += 1 s += 1 print("batch: ", i, " out of ", batches) print("player 1 (", player1, ") wins ", batchp1wins) print("player 2 (", player2, ") wins ", batchp2wins) """ the following code flips the training data since a position where player 2 is guaranteed to win would be a position where player 1 is guaranteed to win if every piece was reversed """ labelArrayOpp = Functions.ReverseArray(labelArray) boardArrayOpp = Functions.ReverseArray(boardArray) labelArray = np.concatenate((labelArray, labelArrayOpp), axis=1) boardArray = np.concatenate((boardArray, boardArrayOpp), axis=1) #trains the neural network _, boardCost = sess.run([optimiser, cost], feed_dict={ inp: boardArray, label: labelArray }) print(boardCost) #savePath = saver.save(sess, modelPath)# saves the updated network end = time.time() print("batch time(secs) ", end - start) i += 1 progend = time.time() print("total games: ", batches * batchSize) print("player 1 (", player1, ") wins ", p1wins) print("player 2 (", player2, ") wins ", p2wins) print("total time(secs) ", progend - progstart)
import MachineLearning as ML if __name__ == '__main__': SVM = ML.ML_SVM(True) # Parametros para buscar descargas diaAnalizarIni = '2016-01-01 00:00:00' diaAnalizarFin = '2016-04-01 00:00:00' # coordenadaAnalizar = '-57.606765,-25.284659' # Asuncion2 coordenadaAnalizar = '-57.58762493212727,-25.362657878768985' # Asuncion2 # coordenadaAnalizar = '-54.842809,-25.459519' # Ciudad del Este Aeropuerto Guarani # coordenadaAnalizar = '-55.873211,-27.336775' # Encarnacion - Playa San Jose tiempoIntervalo = 10 # minutos diametroAnalizar = '45000' # en metros SVM.RecorrerYGenerar(diaAnalizarIni, diaAnalizarFin, coordenadaAnalizar, tiempoIntervalo, diametroAnalizar)
def main(): # Set seed np.random.seed(0) # Create the training/test set(s) from file(s) train = pd.read_csv("data/all_visits_practice_2.csv") # Preliminary data diagnostics mL.describe_data(data=train, describe=True, info=True, value_counts=["ONOFF", "NP3BRADY"], description="PRELIMINARY DATA DIAGNOSTICS:") # Encode EVENT_ID to numeric mL.clean_data(data=train, encode_man={"EVENT_ID": {"SC": 0, "V04": 4, "V06": 6, "V10": 10}}) # Choose On or Off train = train[train["ONOFF"] == 0] # Remove the class with only a single sample train = train[train.NP3BRADY != 4] # Predictors for the model predictors = ["TIME_PASSED", "VISIT_NOW", "CAUDATE_R", "CAUDATE_L", "PUTAMEN_R", "PUTAMEN_L", "SCORE_NOW"] # Target for the model target = "SCORE_NEXT" # Generate new features train = generate_features(data=train, predictors=predictors, target=target, id_name="PATNO", score_name="NP3BRADY", visit_name="EVENT_ID") # Value counts for EVENT_ID after feature generation mL.describe_data(data=train, info=True, describe=True, value_counts=["VISIT_NOW", "SCORE_NEXT"], description="AFTER FEATURE GENERATION:") # Univariate feature selection mL.describe_data(data=train, univariate_feature_selection=[predictors, target]) # Algs for model algs = [RandomForestClassifier(n_estimators=1000, min_samples_split=50, min_samples_leaf=2, oob_score=True), LogisticRegression(), SVC(probability=True), GaussianNB(), MultinomialNB(), BernoulliNB(), KNeighborsClassifier(n_neighbors=25), GradientBoostingClassifier(n_estimators=10, max_depth=3)] # Alg names for model alg_names = ["Random Forest", "Logistic Regression", "SVM", "Gaussian Naive Bayes", "Multinomial Naive Bayes", "Bernoulli Naive Bayes", "kNN", "Gradient Boosting"] # Parameters for grid search grid_search_params = [{"n_estimators": [50, 500, 1000], "min_samples_split": [25, 50, 75], "min_samples_leaf": [2, 15, 25, 50]}] # Ensemble ens = mL.ensemble(algs=algs, alg_names=alg_names, ensemble_name="Weighted ensemble of RF, LR, SVM, GNB, KNN, and GB", in_ensemble=[True, True, True, True, False, False, True, True], weights=[3, 2, 1, 3, 1, 3], voting="soft") # Add ensemble to algs and alg_names algs.append(ens["alg"]) alg_names.append(ens["name"]) # Display ensemble metrics mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names, feature_importances=[True], base_score=[True], oob_score=[True], cross_val=[True, True, True, True, True, True, True, True, True], split_accuracy=[True, True, True, True, True, True, True, True, True], split_classification_report=[False, False, False, False, False, False, False, False, True], split_confusion_matrix=[False, False, False, False, False, False, False, False, True])
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri Dec 7 15:15:35 2018 @author: aantoniadis """ import New_data import MachineLearning ######## SETTINGS ############## regression = 'ridge' # choose the ML model: linear, ridge, ridgecv, multitasklasso, multitaskelasticnet (Recommended: ridge) do_rv_cor = 'yes' # choose 'yes' if you want to do rv correction to the spectra or 'no' if they are already corrected ######################## New_data.EWmeasurements(do_rv_cor) MachineLearning.ML(regression)
import moviepy.editor as mpy import numpy as np import pandas as pd from scipy.spatial import ConvexHull import MachineLearning as ML from util import DatabaseConnection as db from util import PlotData as plt # Configuraciones por defecto writeAnalisis = True # Si queremos crear un .csv con conclusion y resumen del analisis if __name__ == '__main__': SVM = ML.ML_SVM(False) inicio_de_tiempo = time.time() # DATOS DE ANALISIS DE PRUEBA diaAnalizarIni = datetime.strptime('2016-01-25 14:00:00', '%Y-%m-%d %H:%M:%S') diaAnalizarFin = datetime.strptime('2016-01-25 15:30:00', '%Y-%m-%d %H:%M:%S') # coordenadaAnalizar = '-57.606765,-25.284659' # Asuncion coordenadaAnalizar = '-55.873211,-27.336775' # Encarnacion - Playa San Jose tiempoIntervalo = 10 # minutos # DATOS DE ANALISIS EN TIEMPO REAL # diaAnalizarIni = datetime.now() - timedelta(minutes=15) # diaAnalizarFin = datetime.now()
def machine_learning(self): print(m.ml_task())
import MachineLearning testing_params = [ [6.7, 0.76, 0.02, 1.8, 0.078, 6, 12, 0.996, 3.55, 0.63, 9.95], # C_WINE [6.6, 0.61, 0.01, 1.9, 0.08, 8, 25, 0.99746, 3.69, 0.73, 10.5], # B_WINE [10.7, 0.52, 0.38, 2.6, 0.066, 29, 56, 0.99577, 3.15, 0.79, 12.1], # A_WINE [9.5, 0.72, 0.24, 2.3, 0.07, 21, 47, 0.9962, 3.54, 0.70, 11.3] # ? ] print("\nTestando valores para modelo SVM Linear") response = MachineLearning.execute_model('wines_svm_linear', testing_params) print(response) print("\nTestando valores para modelo SVM RBF") response = MachineLearning.execute_model('wines_svm_rbf', testing_params) print(response) print("\nTestando valores para modelo Random Forest") response = MachineLearning.execute_model('wines_rf', testing_params) print(response)
def run(preprocess_data, cohorts, target, score_name, feature_elimination_n, gen_filename, gen_action, gen_updrs_subsets, gen_time, gen_future, gen_milestones, gen_milestone_features_values, gen_slopes, predictors_filename, predictors_action, feature_importance_n, grid_search_action, grid_search_results, print_results, results_filename, prediction_range, range_target, range_target_description, add_predictors, drop_predictors): # Initialize empty add_predictors if add_predictors is None: add_predictors = [] # Data keys data_keys = ["PATNO", "EVENT_ID", "INFODT", "PDDXDT", "SXDT", "BIRTHDT.x", "HAS_PD", target] # Target keys target_keys = [score_name] if gen_future or gen_slopes else [ x[0] for x in gen_milestone_features_values] if gen_milestones else [] # Add target keys to data keys data_keys.extend(target_keys) # TODO: Create data_preprocessing() function for all of this data preprocessing if preprocess_data: # Create the data frames from files with np.warnings.catch_warnings(): np.warnings.simplefilter("ignore") all_patients = pd.read_csv("data/all_pats.csv") all_visits = pd.read_csv("data/all_visits.csv") all_updrs = pd.read_csv("data/all_updrs.csv") # Enrolled cohorts patients pd_control_patients = all_patients.loc[ (np.bitwise_or.reduce(np.array([(all_patients["APPRDX"] == cohort) for cohort in cohorts]))) & ( all_patients["ENROLL_STATUS"] == "Enrolled"), "PATNO"].unique() # Data for these patients pd_control_data = all_visits[all_visits["PATNO"].isin(pd_control_patients)].merge( all_updrs[["PATNO", "EVENT_ID", "TOTAL"]], on=["PATNO", "EVENT_ID"], how="left").merge( all_patients, on="PATNO", how="left", suffixes=["_x", ""]) # Only include "off" data pd_control_data = pd_control_data[pd_control_data["PAG_UPDRS3"] == "NUPDRS3"] # # Merge SC data onto BL data # sc_bl_merge = pd_control_data[pd_control_data["EVENT_ID"] == "BL"].merge( # pd_control_data[pd_control_data["EVENT_ID"] == "SC"], on="PATNO", how="left", suffixes=["", "_SC_ID"]) # # # Remove SC data that already belongs to BL # pd_control_data.loc[pd_control_data["EVENT_ID"] == "BL"] = sc_bl_merge.drop( # [col for col in sc_bl_merge.columns if col[-6:] == "_SC_ID"], axis=1).values # # Initiate progress # prog = Progress(0, len(pd_control_data["PATNO"].unique()), "Merging Screening Into Baseline", print_results) # # # Use SC data where BL is null # for patient in pd_control_data["PATNO"].unique(): # if not pd_control_data[(pd_control_data["PATNO"] == patient) & (pd_control_data["EVENT_ID"] == "SC")].empty: # for column in pd_control_data.keys(): # if (pd_control_data.loc[(pd_control_data["PATNO"] == patient) & ( # pd_control_data["EVENT_ID"] == "BL"), column].isnull().values.all()) and ( # pd_control_data.loc[(pd_control_data["PATNO"] == patient) & ( # pd_control_data["EVENT_ID"] == "SC"), column].notnull().values.any()): # pd_control_data.loc[ # (pd_control_data["PATNO"] == patient) & (pd_control_data["EVENT_ID"] == "BL"), column] = \ # max(pd_control_data.loc[ # (pd_control_data["PATNO"] == patient) & ( # pd_control_data["EVENT_ID"] == "SC"), column].tolist()) # # Update progress # prog.update_progress() # Remove SC rows pd_control_data = pd_control_data[pd_control_data["EVENT_ID"] != "SC"] # Drop duplicates based on PATNO and EVENT_ID, keep only first pd_control_data = pd_control_data.drop_duplicates(subset=["PATNO", "EVENT_ID"], keep="first") # Encode to numeric mL.clean_data(data=pd_control_data, encode_auto=["HANDED", "PAG_UPDRS3"], encode_man={ "EVENT_ID": {"BL": 0, "V01": 1, "V02": 2, "V03": 3, "V04": 4, "V05": 5, "V06": 6, "V07": 7, "V08": 8, "V09": 9, "V10": 10, "V11": 11, "V12": 12}}) # Create HAS_PD column pd_control_data["HAS_PD"] = 0 pd_control_data.loc[(pd_control_data["APPRDX"] == "PD") | (pd_control_data["APPRDX"] == "GRPD") | ( pd_control_data["APPRDX"] == "GCPD"), "HAS_PD"] = 1 # Convert remaining categorical data to binary columns numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64'] dummy_features = [item for item in pd_control_data.columns.values if item not in list( pd_control_data.select_dtypes(include=numerics).columns.values) + drop_predictors] pd_control_data = pd.get_dummies(pd_control_data, columns=dummy_features) # Controls have missing PDDXDT and SXDT, set to arbitrary date pd_control_data.loc[pd_control_data["HAS_PD"] == 0, "PDDXDT"] = pd.to_datetime("1/1/1800") pd_control_data.loc[pd_control_data["HAS_PD"] == 0, "SXDT"] = pd.to_datetime("1/1/1800") pd_control_data.to_csv("data/PPMI_Clean_Data.csv", index=False) else: # Use preprocessed data pd_control_data = pd.read_csv("data/PPMI_Clean_Data.csv") # Convert to correct dtypes pd_control_data[["PATNO", "EVENT_ID"]] = pd_control_data[["PATNO", "EVENT_ID"]].apply(pd.to_numeric, errors="coerce") if predictors_action: if print_results: print("Optimizing Predictors . . .") # Drop unused columns for column in pd_control_data.keys(): if (column in drop_predictors) and (column not in data_keys): pd_control_data = pd_control_data.drop(column, 1) else: # Drop unused columns pd_control_data = pd_control_data[list( set(add_predictors + data_keys) & set( pd_control_data.columns.values.tolist()))] if print_results: # Print number patients and features before feature elimination print("BEFORE FEATURE ELIMINATION: Patients: {}, Features: {}".format( len(pd_control_data[pd_control_data["EVENT_ID"] == 0]), len(pd_control_data.keys()))) pd_control_data.to_csv("TEST.csv") # Perform optimal feature elimination if feature_elimination_n is None: feature_elimination_n = max([x / 1000 for x in range(25, 1000, 25)], key=lambda n: feature_row_selection(pd_control_data, n, data_keys, target_keys, True, True)) if print_results: print("\rFeature Elimination N: {}\n".format(feature_elimination_n)) # Feature/row elimination pd_control_data = feature_row_selection(pd_control_data, feature_elimination_n, data_keys, target_keys) if (not predictors_action) and print_results: # Print number patients and features after feature elimination print("AFTER FEATURE ELIMINATION: Patients: {}, Features: {}".format( len(pd_control_data[pd_control_data["EVENT_ID"] == 0]), len(pd_control_data.keys()))) # Select all features in the data set all_data_features = list(pd_control_data.columns.values) pd_control_data.to_csv("testttttt.csv") # Generate features (and update all features list) train = generate_features(data=pd_control_data, features=all_data_features, filename=gen_filename, action=gen_action, updrs_subsets=gen_updrs_subsets, time=gen_time, future=gen_future, milestones=gen_milestones, slopes=gen_slopes, score_name=score_name, milestone_features_values=gen_milestone_features_values, progress=(not predictors_action) and print_results) if (not predictors_action) and print_results: # Data diagnostics after feature generation mL.describe_data(data=train, describe=True, description="AFTER FEATURE GENERATION:") # Parameters for grid search grid_search_params = [{"n_estimators": [50, 150, 300, 500, 750, 1000], "min_samples_split": [4, 8, 25, 50, 75, 100], "min_samples_leaf": [2, 8, 15, 25, 50, 75, 100]}] # Algs for model # Grid search (futures): n_estimators=50, min_samples_split=75, min_samples_leaf=50 # Futures: n_estimators=150, min_samples_split=100, min_samples_leaf=25 # Grid search (slopes): 'min_samples_split': 75, 'n_estimators': 50, 'min_samples_leaf': 25 # Futures: 'min_samples_leaf': 100, 'min_samples_split': 25, 'n_estimators': 50 # Newest Futures: {'n_estimators': 500, 'min_samples_leaf': 2, 'min_samples_split': 4} # TRMR: {'n_estimators': 150, 'min_samples_leaf': 2, 'min_samples_split': 8} # Slopes: {'n_estimators': 500, 'min_samples_split': 25, 'min_samples_leaf': 2} algs = [ RandomForestRegressor(n_estimators=500, min_samples_split=4, min_samples_leaf=2, oob_score=True) if target != "SCORE_SLOPE" else RandomForestClassifier(n_estimators=500, min_samples_split=25, min_samples_leaf=2, oob_score=True), LogisticRegression(), SVC(probability=True), GaussianNB(), MultinomialNB(), BernoulliNB(), KNeighborsClassifier(n_neighbors=25), GradientBoostingClassifier(n_estimators=10, max_depth=3)] # Alg names for model alg_names = ["Random Forest", "Logistic Regression", "SVM", "Gaussian Naive Bayes", "Multinomial Naive Bayes", "Bernoulli Naive Bayes", "kNN", "Gradient Boosting"] # TODO: Configure ensemble # Ensemble ens = mL.ensemble(algs=algs, alg_names=alg_names, ensemble_name="Weighted ensemble of RF, LR, SVM, GNB, KNN, and GB", in_ensemble=[True, True, True, True, False, False, True, True], weights=[3, 2, 1, 3, 1, 3], voting="soft") # Add ensemble to algs and alg_names # algs.append(ens["alg"]) # alg_names.append(ens["name"]) if predictors_action: # Initialize predictors as all numeric features numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64'] predictors = list(train.select_dtypes(include=numerics).columns.values) # Drop unwanted features from predictors list for feature in drop_predictors: if feature in predictors: predictors.remove(feature) # If grid search action, use grid search estimator if grid_search_action: algs[0] = mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names, scoring="r2" if target != "SCORE_SLOPE" else "accuracy", grid_search_params=grid_search_params, output=True)["Grid Search Random Forest"].best_estimator_ train[predictors + ["PATNO"]].to_csv("test_yay_delete.csv") # Get feature importances feature_importances = mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names, feature_importances=[True], output=True, description=None)["Feature Importances Random Forest"] # Set important features as predictors predictors = [x for x, y in feature_importances if y >= feature_importance_n] # Use predictors plus added predictors add_predictors.extend(predictors) # Output predictors to file pd.DataFrame({"predictors": predictors}).to_csv(predictors_filename, index=False) # Run with new predictors run(False, cohorts, target, score_name, feature_elimination_n, gen_filename, gen_action, gen_updrs_subsets, gen_time, gen_future, gen_milestones, gen_milestone_features_values, gen_slopes, predictors_filename, False, feature_importance_n, grid_search_action, grid_search_results, print_results, results_filename, prediction_range, range_target, range_target_description, add_predictors, drop_predictors) else: # Get predictors from file predictors = add_predictors # Create file of training data train[predictors].to_csv("data/PPMI_train.csv") # Grid search if grid_search_action or grid_search_results: # Compute grid search grid_search = mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names, scoring="r2" if target != "SCORE_SLOPE" else "accuracy", grid_search_params=grid_search_params, output=True) # If grid search action, use grid search estimator if grid_search_action: algs[0] = grid_search["Grid Search Random Forest"].best_estimator_ # Univariate feature selection # mL.describe_data(data=train, univariate_feature_selection=[predictors, target]) # Display metrics, including r2 score metrics = mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names, feature_importances=[True], base_score=[True], oob_score=[True], cross_val=[True], scoring="r2", output=not print_results) # feature_dictionary=[data_dictionary, "FEATURE", "DSCR"]) # Display mean absolute error score metrics.update(mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names, cross_val=[True], scoring="mean_absolute_error", description=None, output=not print_results)) # Display root mean squared error score metrics.update(mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names, cross_val=[True], scoring="root_mean_squared_error", description=None, output=not print_results)) metrics["Cross Validation accuracy Random Forest"] = None # Metrics for classification if target == "SCORE_SLOPE": # Display classification accuracy metrics.update(mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names, cross_val=[True], scoring="accuracy", description=None, output=not print_results)) # Display confusion matrix mL.metrics(data=train, predictors=predictors, target=target, algs=algs, alg_names=alg_names, split_confusion_matrix=[True], description=None, output=not print_results) # If grid search results, print results if grid_search_results: print(grid_search["Grid Search String Random Forest"]) if not print_results: # Write results to file results = pd.DataFrame( columns=[prediction_range, "description", "base", "oob", "r2", "mes", "rmse", "accuracy", "features", "importances"]) results.loc[0, prediction_range] = range_target results.loc[0, "description"] = range_target_description results.loc[0, "base"] = metrics["Base Score Random Forest"] results.loc[0, "oob"] = metrics["OOB Score Random Forest"] results.loc[0, "r2"] = metrics["Cross Validation r2 Random Forest"] results.loc[0, "mes"] = metrics["Cross Validation mean_absolute_error Random Forest"] results.loc[0, "rmse"] = metrics["Cross Validation root_mean_squared_error Random Forest"] results.loc[0, "accuracy"] = metrics["Cross Validation accuracy Random Forest"] feature_importances = list(metrics["Feature Importances Random Forest"]) results.loc[0, "features"] = feature_importances[0][0] results.loc[0, "importances"] = feature_importances[0][1] for feature, importance in feature_importances[1:]: index = results.index.max() + 1 results.loc[index, "features"] = feature results.loc[index, "importances"] = importance results.to_csv(results_filename, mode="a", header=False, index=False)
import pandas import config from sklearn.utils import resample import MachineLearning def binary_to_char(value): return 'N' if value == 0 else 'Y' ml = MachineLearning.MachineLearning() data = pandas.read_csv(config.DATA_PATH + 'ortopedia.csv', sep=';') # Convertendo coluna de classificação para caractere data.Fusao_de_Vertebras = data.Fusao_de_Vertebras.map(binary_to_char) # Verificando balanceamento de classes print(data.Fusao_de_Vertebras.value_counts()) # Separando dados das classes minor = data[data['Fusao_de_Vertebras'] == 'Y'] major = data[data['Fusao_de_Vertebras'] == 'N'] # Rebalanceando a classe com menor número de registros minor_up_sample = resample(minor, replace=True, n_samples=7900, random_state=None) # Criando novo dataframe com os dados balanceados
import random import numpy.matlib import numpy as np import MachineLearning as ml test = ml.NeuralNetwork(2, 9, 1) #Training the model to learn XOR for i in range(0, 1000): test.train(np.array([[1], [0]]), np.array([[1]])) test.train(np.array([[0], [1]]), np.array([[1]])) test.train(np.array([[1], [1]]), np.array([[0]])) test.train(np.array([[0], [0]]), np.array([[0]])) print("IH weights: " + str(test.weightIH)) print("HO weights: " + str(test.weightHO)) print("Bias H: " + str(test.biasH)) print("Bias O: " + str(test.biasO)) print("\n\n") #Testing the model with XOR print(test.feedforward(np.array([[1], [0]]))) print(test.feedforward(np.array([[0], [1]]))) print(test.feedforward(np.array([[1], [1]]))) print(test.feedforward(np.array([[0], [0]])))