def __init__(self, nodo, aristas): self.vertices = nodo self.aristas = aristas self.grafo = nx.Graph()#Grafo no dirigido #self.gradoD = nx.DiGraph() self.gradoD = nx.MultiDiGraph() self.objLoadData = LoadData()
def __init__(self, ori_dataset, max_depth): self.col = [] self.max_depth = max_depth self.dataset = ori_dataset self.ld = LoadData() self.ins = Inspection(ori_dataset)
def load_images(self): """ load visualization samples. :return next_element_train (generator): can iter images and labels. """ loader = LoadData(self.batch_size, self.sess) next_element_train = loader.get_data(self.train_files) return next_element_train
def Caculate(self, sess, files, accuracy_tensor, Cost_tensor, data_placeholder, labels_placeholder, rate_placeholder, model='train'): """ Calculate accuracy and loss. :param sess : tensor graph. :param files (ndarray): score data. :param accuracy_tensor (tensor): accuracy tensor function graph. :param Cost_tensor (tensor): cost tensor function graph. :param data_placeholder (tensor): data placeholder. :param labels_placeholder (tensor): labels placeholder. :param rate_placeholder (tensor): rate placeholder. :param model (string): can choose 'train' or 'test' to scored model. :return acc_mean,loss_mean (float): mean accuracy and mean loss. """ if model == 'train': N = self.N_train else: N = self.N_test loader = LoadData(self.batch_size, sess) next_element_ = loader.get_data(files) acc, loss, count = 0, 0, 1 while 1: try: images, labels = sess.run(next_element_) print('Score {}/{} \r'.format(count, N), end='', flush=True) count += 1 except tf.errors.OutOfRangeError: break else: acc_, loss_ = sess.run( [accuracy_tensor, Cost_tensor], feed_dict={ data_placeholder: images, labels_placeholder: labels, rate_placeholder: 0 }) acc += acc_ loss += loss_ acc_mean = acc / count loss_mean = loss / count return acc_mean, loss_mean
class Inspection(): def __init__(self,ori_dataset): self.ori_dataset = ori_dataset self.ld = LoadData() self.er = 0 self.gi = 0 # majority vote def majority_vote(self,dataset): count1 = 0 label = self.ld.get_value(dataset,-1) for row in dataset: if row[-1] == label[0]: count1 += 1 else: continue count2 = len(dataset) - count1 if count1 > count2: return label[0] elif count2 > count1: return label[1] elif count2==0: return label[0] else: return label[1] # error rate def error_rate(self,dataset): label = self.majority_vote(dataset) count = 0 for row in dataset: if row[-1] != label: count += 1 self.er = count/len(dataset) return self.er #gini impurity def gini_impurity(self,dataset): if len(dataset)==0: self.gi=0 else: count1 = 0 for item in dataset: if item[-1]==dataset[0][-1]: count1+=1 count2 = len(dataset)-count1 self.gi = (count1/len(dataset))*(count2/len(dataset))+(count2/len(dataset))*(count1/len(dataset)) return self.gi # evaluate with error_rate and gini_impurity def evaluate(self): err_rate = self.error_rate(self.ori_dataset) gini_impurity = self.gini_impurity(self.ori_dataset) return err_rate,gini_impurity
class Grafo: def __init__(self, nodo, aristas): self.vertices = nodo self.aristas = aristas self.grafo = nx.Graph()#Grafo no dirigido #self.gradoD = nx.DiGraph() self.gradoD = nx.MultiDiGraph() self.objLoadData = LoadData() def existeNodo(self, nodo): if nodo in self.vertices: return True return False def getGrafo(self): return self.gradoD def addVertex(self, nodo): pass def addArista(self): for base in self.aristas: aux = self.aristas.get(base) print(base) consultaColum = self.objLoadData.uniqueColumn(' airport_arr ', aux) for destino in consultaColum: self.gradoD.add_edge(base,destino) print(base,destino) print (len((self.gradoD.edges))) print(self.gradoD.edges) def addNode(self): nodoBases = [' BASE1 ', ' BASE2 ', 'BASE3 '] for nodo in self.vertices: if nodo in nodoBases: self.gradoD.add_node(nodo) def drawGraph(self): self.gradoD.add_nodes_from(self.vertices) self.addArista() pos = nx.spring_layout(self.gradoD) plt.figure() nx.draw(self.gradoD, pos, edge_color = 'black', width = 1, linewidths =2, node_size= 1500 , node_color = 'purple', alpha= 0.7,ax=None) nx.draw_networkx_labels(self.gradoD,pos, labels= {node: node for node in self.gradoD.nodes()}, font_size=9, alpha=1, horizontalalignment='center',verticalalignment='center' ,ax=None) plt.show()
def __init__(self,col_index_used,col_index_touse, dataset, head, left = None, right = None, depth=0): self.left = left self.right = right self.message = '' self.col_index_touse = col_index_touse self.col_index_used = col_index_used self.attribute = head[col_index_used] # the attribute to divide self.dataset = dataset # the dataset divided by attribute self.depth = depth # current depth of the node self.ld = LoadData() self.ins = Inspection(self.dataset) self.result = self.ins.majority_vote(dataset) # majority_vote for each node
class Vertex: def __init__(self): self.objloadata = LoadData() def recorrerDataFrame(self, dataF): for indiceF, fila in dataF.iterrows(): print(fila) def recorrerDictionary(self, clave): self.objloadata.upLoadData() listaAdyancen = self.objloadata.getBasesConexiones() print(listaAdyancen.keys()) aux = pd.DataFrame() if clave in listaAdyancen: aux = listaAdyancen.get(clave) print(type(aux)) self.recorrerDataFrame(aux) print('*******') else: print('Base o aeropuerto no existe')
def addNode(self): nodoBases = [' BASE1 ', ' BASE2 ', 'BASE3 '] for nodo in self.vertices: if nodo in nodoBases: self.gradoD.add_node(nodo) def drawGraph(self): self.gradoD.add_nodes_from(self.vertices) self.addArista() pos = nx.spring_layout(self.gradoD) plt.figure() nx.draw(self.gradoD, pos, edge_color = 'black', width = 1, linewidths =2, node_size= 1500 , node_color = 'purple', alpha= 0.7,ax=None) nx.draw_networkx_labels(self.gradoD,pos, labels= {node: node for node in self.gradoD.nodes()}, font_size=9, alpha=1, horizontalalignment='center',verticalalignment='center' ,ax=None) plt.show() objload = LoadData() objload.upLoadData() print(objload.getBasesConexiones()) objgrafo = Grafo(objload.getDataAirport(),objload.getBasesConexiones()) objgrafo.drawGraph()
from flask import Flask, request from datetime import datetime import re from loadData import LoadData from flask import Response from flask_cors import CORS app = Flask(__name__) CORS(app) loadData = LoadData() @app.route("/", methods=['POST', 'GET']) def home(): stockStatementURI = 'C:\\Users\\Ashutosh\\workspace\\BusinessAnalytics\\local\\BusinessAnalyticsPy\\fwdlezarstock\\STOCK-3010.csv' skipRows = 2 stockStatement = loadData.loadCsvData(stockStatementURI, skipRows) stockStatementDF = loadData.convertCSVToDataFrame(stockStatement) product = stockStatementDF['Product Name'] cost = stockStatementDF['Purchase Price'] * \ stockStatementDF['Current Stock'] # Create a new array for X-Axis being the 1st column and Y-axis being the second column data = loadData.convertToJsonObject( ["products", "cost"], [product.to_json(), cost.to_json()]) product.to_json() # data = '{"products":' + product.to_json() + ',"cost":' + cost.to_json() + "}"
def prepareAndReadData(self): ''' Load the data from the onefile into memory, with train/val/test chunks ''' loader = LoadData(self.args) return loader.run()
def inicializar (self): objload = LoadData()
import matplotlib.pyplot as plt import numpy as np from loadData import LoadData from decisionTree import Node, DecisionTree, Evaluate from inspection import Inspection if __name__ == '__main__': train_input = '../handout/education_train.tsv' test_input = '../handout/education_test.tsv' train_output = '../result/education_train.labels' test_output = '../result/education_test.labels' ld = LoadData() dataset = ld.load_data(train_input) dt = DecisionTree(ld) tr_err = [] te_err = [] x_arr = [] print(ld.head) for i in range(len(ld.head)): root = dt.construct(dataset, i) # dt.traverse(root) dt.classify(ld.load_data(train_input), root, train_output) dt.classify(ld.load_data(test_input), root, test_output) with open(train_output, 'r') as f: predcol = f.read().splitlines() realcol = np.loadtxt(train_input, dtype=str, delimiter='\t', skiprows=1)[:, -1]
from loadData import LoadData if __name__ == "__main__": ld = LoadData() # populate tables and update attr after getting all uniprot id using R ld.getBiogridIDFromUniprot() ld.loadProteinDataBankTable() # taken from protein atlas # populate after getting ensembl id using R ld.importCSV("./origExcel/subcellular_location.csv") ld.addProteinAtlasLocalization() # generating statis for typeahead search ld.importCSV("./origExcel/csvMutCollection.csv") ld.exportOneColtoJSON('Gene', 'HUGO_GENE_SYMBOL') ld.exportOneColtoJSON('Variant', 'MUT_HGVS_NT_ID') ld.exportOneColtoJSON('VariantProperty', 'CLINVAR_CLINICAL_SIGNIFICANCE') ld.exportOneColtoJSON('VariantProperty', 'PFAM_PRESENT') ld.exportOneColtoJSON('VariantProperty', 'IN_PFAM') ld.exportOneColtoJSON('VariantProperty', 'IN_MOTIF') ld.exportOneColtoJSON('Variant', 'CCSB_MUTATION_ID') ld.exportOneColtoJSON('Variant', 'DBSNP_ID') ld.exportOneColtoJSON('Variant', 'CHR_COORDINATE_HG19') ld.exportOneColtoJSON('Variant', 'MUT_HGVS_AA_ID') ld.exportOneColtoJSON('VariantProperty', 'HGMD_VARIANT_CLASS') ld.exportOneColtoJSON('Gene', 'OMIM_ID') ld.exportOneColtoJSON('Gene', 'ENTREZ_GENE_ID') ld.exportOneColtoJSON('Gene', 'UNIPROT_SWISSPROT_ID') ld.exportOneColtoJSON('Gene', 'CHROMOSOME_NAME')
nameSim = 'validation.mat' flagSave = True nameSavedFile = 'linValidation' format = 'eps' # Retrieve data for the 3 buildings and save them in different dataframe nameBui = ['old', 'reno', 'renoLight'] leg = ['Original', 'Renovated', 'Light weight'] listDic = [{},{},{}] listDf = [] nZones = 6 for j, s in enumerate(nameBui): for i in range(nZones): listDic[j]['Zone%d'%(i+1)] = '%s.error[%d]'%(s,(i+1)) listDf.append(LoadData(path+nameSim, listDic[j]).df) # Make boxplot from dataframes f, axarr = plt.subplots(1, len(nameBui)) for j , s in enumerate(leg): listDf[j].boxplot(ax=axarr[j]) axarr[j].set_title(s) axarr[j].set_ylim(-1,1) # rotate xticklables for tick in axarr[j].get_xticklabels(): tick.set_rotation(45) if j==0: axarr[j].set_ylabel('[K]') if flagSave: f.savefig(nameSavedFile + "." + format,format=format, bbox_inches='tight')
class DecisionTree(): def __init__(self, ori_dataset, max_depth): self.col = [] self.max_depth = max_depth self.dataset = ori_dataset self.ld = LoadData() self.ins = Inspection(ori_dataset) # divide the dataset with certain attribute def divide_dataset(self, dataset, col_index): label = self.ld.get_value(dataset, col_index) dataset0 = [] dataset1 = [] for row in dataset: if row[col_index] == label[0]: dataset0.append(row) else: dataset1.append(row) dataset0 = np.array(dataset0) dataset1 = np.array(dataset1) return dataset0, dataset1 # calculate the gini impurity given attribute def gini_impurity(self, dataset, col_index=-1): if col_index == -1: gi = self.ins.gini_impurity(dataset) else: # print('dataset:\n', dataset) # print('col index:',col_index) ds = self.divide_dataset(dataset, col_index) # print('ds0:\n',ds[0]) # print('ds1:\n', ds[1]) gi_left = self.ins.gini_impurity(ds[0]) gi_right = self.ins.gini_impurity(ds[1]) gi = (len(ds[0]) / len(dataset)) * gi_left + ( len(ds[1]) / len(dataset)) * gi_right return gi # calculate the gini gain given attribute def gini_gain(self, dataset, col_index): ori_gi = self.gini_impurity(dataset) new_gi = self.gini_impurity(dataset, col_index) gg = ori_gi - new_gi return gg def get_attribute(self, dataset, used_col): gg_arr = {} col_arr = [i for i in range(len(dataset[0]) - 1)] for item in list(set(col_arr).difference(set(used_col))): gg_arr[item] = self.gini_gain(dataset, item) col_index = max(gg_arr, key=gg_arr.get) return col_index # 记录下路径,再进行搞 def construct(self, dataset, col_index=-1, depth=0): # print('\nlen:',len(dataset)) # print(dataset) # print('used_col:',self.col) # print('depth:', depth) # reach the max depth if depth > self.max_depth: print('depth reach max depth') # self.col.pop(col_index) return None # after divide the dataset is empty elif len(dataset) == 0: print('dataset is empty.') # self.col.pop(col_index) return None # No more attribute to divide elif len(dataset[0]) == len(self.col) + 1: # print(self.col) print('all the attributes have been used.') # self.col.pop(col_index) # print(depth) return None # after divide the gini-impurity of dataset is 0 elif self.gini_impurity(dataset) == 0: print('no need to do more division!') # self.col.pop(col_index) return None # recursively construct the left and right node else: col_index = self.get_attribute(dataset, self.col) # construct the current node node = Node(col_index, dataset, depth=depth) self.col.append(col_index) # divide the dataset according to max gini-gain new_dataset = self.divide_dataset(dataset, col_index) depth += 1 #recurse the left branch left_branch = self.construct(new_dataset[0], col_index, depth) node.left = left_branch # self.col.pop(col_index) #recurse the right branch right_branch = self.construct(new_dataset[1], col_index, depth) node.right = right_branch # print('col_index:',col_index) self.col.remove(col_index) return node def traverse(self, node): if node: # print(node.dataset,'\n') print(node.depth, '\t', node.attribute) self.traverse(node.left) self.traverse(node.right)
right_branch = self.construct(new_dataset[1], col_index, depth) node.right = right_branch # print('col_index:',col_index) self.col.remove(col_index) return node def traverse(self, node): if node: # print(node.dataset,'\n') print(node.depth, '\t', node.attribute) self.traverse(node.left) self.traverse(node.right) if __name__ == '__main__': ld = LoadData() dataset = ld.load_data('../handout/small_train.tsv') dt = DecisionTree(dataset, 0) ds = dt.divide_dataset(dataset, 1) # gini = dt.gini_impurity(dataset,1) giga = dt.gini_gain(dataset, 1) # col = dt.get_attribute(dataset) root = dt.construct(dataset) # print(root.left.left.left.right.depth) dt.traverse(root) # print(root.dataset) # print(dataset) # print(ds[0]) # print(ds[1])
def __init__(self): self.objloadata = LoadData()
def fit(self, lr, epochs, drop_rate): """ fitting model. :param lr (float): learning rate. :param epochs (int): Iterate of epoch. :param drop_rate (float): dropout rate. rate = 1 - keep_prob. :return: """ # create placeholder data = tf.placeholder(tf.float32, [None, 227, 227, 3], name='Input') labels = tf.placeholder(tf.float32, [None, 1], name='Labels') rate = tf.placeholder(tf.float32, name='rate') # build model. params = self.init_parameters() out = self.forward(data, params, rate) Cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=out, labels=labels)) optimizer = tf.train.RMSPropOptimizer(learning_rate=lr).minimize(Cost) # score. predict = tf.round(tf.sigmoid(out)) equal = tf.equal(labels, predict) correct = tf.cast(equal, tf.float32) accuracy = tf.reduce_mean(correct) # split date.. split_data = SplitData(self.file_dir, Load_samples=self.Load_samples, test_rate=self.test_rate) train_files, test_files = split_data() self.N_train = len(train_files) // self.batch_size self.N_test = len(test_files) // self.batch_size # Saver.. saver = tf.train.Saver() tf.add_to_collection('pre_network', out) init = tf.global_variables_initializer() # training... with tf.Session() as sess: sess.run(init) for epoch in range(epochs): loader = LoadData(self.batch_size, sess) next_element_train = loader.get_data(train_files) # running all training set... count = 1 while 1: try: images, target = sess.run(next_element_train) print('Training {}/{} \r'.format(count, self.N_train), end='', flush=True) count += 1 except tf.errors.OutOfRangeError: break else: _ = sess.run(optimizer, feed_dict={ data: images, labels: target, rate: drop_rate }) acc_train, loss_train = self.Caculate(sess, train_files, accuracy, Cost, data, labels, rate, 'train') acc_test, loss_test = self.Caculate(sess, test_files, accuracy, Cost, data, labels, rate, 'test') print( '[{}/{}] train loss:{:.4f} - train acc:{:.4f} - test loss:{:.4f} - test acc:{:.4f}' .format(epoch + 1, epochs, loss_train, acc_train, loss_test, acc_test)) if acc_train >= 0.980: break # Saver .... saver.save(sess, 'model/alexNet')
self.gi=0 else: count1 = 0 for item in dataset: if item[-1]==dataset[0][-1]: count1+=1 count2 = len(dataset)-count1 self.gi = (count1/len(dataset))*(count2/len(dataset))+(count2/len(dataset))*(count1/len(dataset)) return self.gi # evaluate with error_rate and gini_impurity def evaluate(self): err_rate = self.error_rate(self.ori_dataset) gini_impurity = self.gini_impurity(self.ori_dataset) return err_rate,gini_impurity if __name__ == '__main__': infile = sys.argv[1] outfile = sys.argv[2] ld = LoadData() ori_dataset = ld.load_data(infile) ins = Inspection(ori_dataset) eva = ins.evaluate() err_rate = eva[0] gini_impurity = eva[1] with open(outfile, 'w') as f: f.writelines("gini_impurity: {}\n".format(gini_impurity)) f.writelines("error: {}\n".format(err_rate)) # print(err_rate) # print(gini_impurity)
# 'webpack': 'https://github.com/qq20004604/webpack-study', # 'react': 'https://github.com/qq20004604/react-demo', # 'vue': 'github: https://github.com/qq20004604/vue-scaffold\n博客专栏(1.x):https://blog.csdn.net/qq20004604/article/category/6381182', # '笔记': 'https://github.com/qq20004604/notes', # 'demo': 'https://github.com/qq20004604/some_demo', # '海外服务器': 'https://manage.hostdare.com/aff.php?aff=939\n这个可以做私人服务器(不需要备案),也可以找群主询问如何架设SS server的方法。', # 'QQ 机器人': 'https://github.com/qq20004604/qq-robot', # '架构': 'https://juejin.im/post/5cea1f705188250640005472', # 'es6': 'https://blog.csdn.net/qq20004604/article/details/78014684', # 'vue脚手架': 'https://github.com/qq20004604/Vue-with-webpack', # 'react脚手架': 'https://github.com/qq20004604/react-with-webpack', # 'Macbook常用软件': 'https://github.com/qq20004604/when-you-get-new-Macbook', # 'python的django与mysql交互': 'https://blog.csdn.net/qq20004604/article/details/89934212' } ld = LoadData() def log(context, filename='./log.log'): with open(filename, 'a', encoding='utf-8') as f: f.write( 'time:%s, sender:%s, message_type:%s, user_id:%s, content:%s\n' % (datetime.now(), context['sender']['nickname'], context['message_type'], context['sender']['user_id'], context['raw_message'])) @bot.on_message() async def handle_msg(context): msg = context['message'] # print(msg)
def __init__(self,ori_dataset): self.ori_dataset = ori_dataset self.ld = LoadData() self.er = 0 self.gi = 0
def hormigas(): objload = LoadData() aux = objload.getBasesConexiones() print(aux) viajar();