def test(args): inference_graph = tf.Graph() with tf.Session(graph=inference_graph) as sess: if not args.restore_path or not args.vars_file: print( '\n\n\tSpecify a restore_path: --restore_path=<path_to_ckpt> and --vars_file=<vars_file_pathname>\n\n' ) quit() trained_model_saver, w, b = restore_graph(sess, args) _y_ = inference_graph.get_tensor_by_name('y_:0') _loss = inference_graph.get_tensor_by_name('loss:0') _x = inference_graph.get_tensor_by_name('x:0') _y = inference_graph.get_tensor_by_name('y:0') while (1): w, b, train_input, train_output = gen_data(int(args.batch_size), int(args.num_inputs), w, b) y_ = sess.run(_y_, feed_dict={_x: train_input, _y: train_output}) loss = sess.run(_loss, feed_dict={ _x: train_input, _y: train_output }) y_acc = y_ print('Mean Squared Error Loss: %2f\n' % loss) print(train_output) print('\n') print(y_acc) print('\n') input('Press Enter to continue...')
def test(n_rows: int, n_partitions: int): """对比collect_dict_1函数和collect_dict_2函数的性能。 collect_dict_1使用DataFrame的collect方法再转为Dict, collect_dict_2使用Rdd的collect方法再转为Dict, 结论:两者差异不明显 *************************************************************************** 运行时间 --------------------------------------------------------------------------- 100,000条数据,10个分区 函数collect_dict_1运行--100次! 平均值为--0.006 s! 中位数为--0.006 s! 最小值为--0.005 s! 最大值为--0.010 s! 函数collect_dict_2运行--100次! 平均值为--0.007 s! 中位数为--0.007 s! 最小值为--0.006 s! 最大值为--0.010 s! --------------------------------------------------------------------------- 1,000,000条数据, 100个分区 函数collect_dict_1运行--100次! 平均值为--0.053 s! 中位数为--0.052 s! 最小值为--0.047 s! 最大值为--0.139 s! 函数collect_dict_2运行--100次! 平均值为--0.056 s! 中位数为--0.056 s! 最小值为--0.051 s! 最大值为--0.062 s! --------------------------------------------------------------------------- *************************************************************************** *************************************************************************** 占用内存 --------------------------------------------------------------------------- 函数collect_dict_1把DataFrame全部加载到内存中再转换为Dict, 最大内存消耗 = DataFrame + Dict + 容器引用的对象 函数collect_dict_2把Rdd reduce为Dict, 最大内存消耗 = Dict + 容器引用的对象 --------------------------------------------------------------------------- *************************************************************************** Arguments: n_rows {int} -- 随机生成的数据行数。 n_partitions {int} -- 随机生成的数据分区数。 """ print("对比collect_dict_1函数和collect_dict_2函数的性能...") print("共%d条数据,%d个分区!" % (n_rows, n_partitions)) spark = SparkSession.builder.appName("gen_data_test").getOrCreate() data = gen_data(spark, n_rows=n_rows).repartition(n_partitions).cache() data.show() collect_dict_1(data) collect_dict_2(data)
def openFrame(self): self.data = gen_data.gen_data("review1") self.words_num = self.word_num.get() self.root.withdraw() if not self.word_num: messagebox.showinfo(title='提示', message='请输入正确的单词数!') else: self.reviewFrame = tk.Toplevel() self.reviewFrame.geometry("500x450") self.reviewFrame.title("review") self.data.start(int(self.words_num)) tk.Label(self.reviewFrame,text="您已掌握了",font=('楷体',12)).place(x=90,y=10,anchor=tk.NW) tk.Label(self.reviewFrame,textvariable=self.data.master_words,font=('Times New Roman',12)).place(x=200,y=10,anchor=tk.NW) tk.Label(self.reviewFrame,text="个单词",font=('楷体',12)).place(x=320,y=10,anchor=tk.NW) tk.Label(self.reviewFrame,text="现还剩",font=('楷体',12)).place(x=90,y=30,anchor=tk.NW) tk.Label(self.reviewFrame,textvariable=self.data.left_words,font=('Times New Roman',12)).place(x=200,y=30,anchor=tk.NW) tk.Label(self.reviewFrame,text="个单词",font=('楷体',12)).place(x=320,y=30,anchor=tk.NW) tk.Label(self.reviewFrame,textvariable=self.data.word_text,font=('Times New Roman',20)).place(x=200,y=60,anchor=tk.NW) checkbox = tk.Checkbutton(self.reviewFrame,text="show sentences",variable=self.data.status,command=self.data.show_sentences) checkbox.place(x=370,y=90,anchor=tk.NW) tk.Label(self.reviewFrame,textvariable=self.data.sentences,wraplength = 400,font=('Times New Roman',10)).place(x=50,y=120,anchor=tk.NW) self.data.gen_choices() button1 = tk.Button(self.reviewFrame,textvariable=self.data.answer1,height=1,width=25) button1.place(x=150,y=220,anchor=tk.NW) button2 = tk.Button(self.reviewFrame,textvariable=self.data.answer2,height=1,width=25) button2.place(x=150,y=270,anchor=tk.NW) button3 = tk.Button(self.reviewFrame,textvariable=self.data.answer3,height=1,width=25) button3.place(x=150,y=320,anchor=tk.NW) button4 = tk.Button(self.reviewFrame,textvariable=self.data.answer4,height=1,width=25) button4.place(x=150,y=370,anchor=tk.NW) tk.Label(self.reviewFrame,textvariable=self.data.response,font=('Times New Roman',12)).place(x=50,y=400,anchor=tk.NW) button5 = tk.Button(self.reviewFrame,text="Next") button5.place(x=350,y=420,anchor=tk.NW) button6 = tk.Button(self.reviewFrame,text="Exit") button6.place(x=400,y=420,anchor=tk.NW) self.reviewFrame.bind('<a>',self.check_answer1) self.reviewFrame.bind('<b>',self.check_answer2) self.reviewFrame.bind('<c>',self.check_answer3) self.reviewFrame.bind('<d>',self.check_answer4) self.reviewFrame.bind('<n>',self.review_next1) self.reviewFrame.bind('<q>',self.save_and_quit) button1.bind('<Button-1>',self.check_answer1) button2.bind('<Button-1>',self.check_answer2) button3.bind('<Button-1>',self.check_answer3) button4.bind('<Button-1>',self.check_answer4) button5.bind('<Button-1>',self.review_next1) button6.bind('<Button-1>',self.save_and_quit) self.buttonlist1 = [button1,button2,button3,button4] self.buttonlist2 = [button5]
def test(n_rows: int, n_partitions: int): """对比collect_set_1函数和collect_set_2函数的性能。 结论:函数collect_dict_1更快。 *************************************************************************** 运行时间 --------------------------------------------------------------------------- 100,000条数据,10个分区 函数collect_dict_1运行--100次! 平均值为--0.017 s! 中位数为--0.016 s! 最小值为--0.011 s! 最大值为--0.038 s! 函数collect_dict_2运行--100次! 平均值为--0.034 s! 中位数为--0.032 s! 最小值为--0.027 s! 最大值为--0.068 s! --------------------------------------------------------------------------- 1,000,000条数据, 100个分区 函数collect_dict_1运行--100次! 平均值为--0.021 s! 中位数为--0.021 s! 最小值为--0.018 s! 最大值为--0.029 s! 函数collect_dict_2运行--100次! 平均值为--0.069 s! 中位数为--0.068 s! 最小值为--0.062 s! 最大值为--0.093 s! --------------------------------------------------------------------------- *************************************************************************** Arguments: n_rows {int} -- 随机生成的数据行数。 n_partitions {int} -- 随机生成的数据分区数。 """ print("对比collect_dict_1函数和collect_dict_2函数的性能...") print("共%d条数据,%d个分区!" % (n_rows, n_partitions)) spark = SparkSession.builder.appName("gen_data_test").getOrCreate() # 产生重复数据 n_duplicated = 10 data = gen_data(spark, n_rows // n_duplicated) for _ in range(n_duplicated): data.union(data) data = data.repartition(n_partitions).cache() data.show() collect_set_1(data) collect_set_2(data)
def run(Phi, ndims, theta, N, seed): phi = Phi(torch.tensor(theta)) id = get_info() s, log_ll = gen_data(phi, ndims, N, seed) print('avg_log_likelihood', torch.mean(log_ll)) import matplotlib.pyplot as plt plt.scatter(s.detach().numpy()[:, 0], s.detach().numpy()[:, 1]) plt.show() d = {'samples': s, 'log_ll': log_ll} pickle.dump(d, open('./data/frank%s.p' % id, 'wb')) ex.add_artifact('./data/frank%s.p' % id)
def openFrame2(self): self.data = gen_data.gen_data("review2") self.words_num = self.word_num.get() self.root.withdraw() if not self.word_num: messagebox.showinfo(title='提示', message='请输入正确的单词数!') else: self.reviewFrame = tk.Toplevel() self.reviewFrame.geometry("500x370") self.reviewFrame.title("review") self.data.start(int(self.words_num)) tk.Label(self.reviewFrame,text="您已掌握了",font=('楷体',12)).place(x=90,y=10,anchor=tk.NW) tk.Label(self.reviewFrame,textvariable=self.data.master_words,font=('Times New Roman',12)).place(x=200,y=10,anchor=tk.NW) tk.Label(self.reviewFrame,text="个单词",font=('楷体',12)).place(x=320,y=10,anchor=tk.NW) tk.Label(self.reviewFrame,text="现还剩",font=('楷体',12)).place(x=90,y=30,anchor=tk.NW) tk.Label(self.reviewFrame,textvariable=self.data.left_words,font=('Times New Roman',12)).place(x=200,y=30,anchor=tk.NW) tk.Label(self.reviewFrame,text="个单词",font=('楷体',12)).place(x=320,y=30,anchor=tk.NW) tk.Label(self.reviewFrame,textvariable=self.data.word_text,font=('Times New Roman',20)).place(x=200,y=60,anchor=tk.NW) checkbox = tk.Checkbutton(self.reviewFrame,text="show sentences",variable=self.data.status,command=self.data.show_sentences) checkbox.place(x=370,y=90,anchor=tk.NW) tk.Label(self.reviewFrame,textvariable=self.data.sentences,wraplength = 400,font=('Times New Roman',10)).place(x=50,y=120,anchor=tk.NW) self.data.gen_choices() self.Myanswer = tk.StringVar() self.myanswer = tk.Entry(self.reviewFrame,textvariable=self.Myanswer,font=('Times New Roman',12),width=50,fg='black') self.myanswer.place(x=30,y=220,anchor=tk.NW) tk.Label(self.reviewFrame,textvariable=self.data.response,font=('Times New Roman',12)).place(x=50,y=250,anchor=tk.NW) button1 = tk.Button(self.reviewFrame,text="Confirm") button1.place(x=120,y=280,anchor=tk.NW) button2 = tk.Button(self.reviewFrame,text="Add",command=self.add_trans) button2.place(x=300,y=280,anchor=tk.NW) button3 = tk.Button(self.reviewFrame,text="Next") button3.place(x=350,y=330,anchor=tk.NW) button4 = tk.Button(self.reviewFrame,text="Exit") button4.place(x=400,y=330,anchor=tk.NW) self.reviewFrame.bind('<y>',self.check_my) self.reviewFrame.bind('<n>',self.review_next2) self.reviewFrame.bind('<q>',self.save_and_quit) button1.bind('<Button-1>',self.check_my) button3.bind('<Button-1>',self.review_next2) button4.bind('<Button-1>',self.save_and_quit)
def main(): print "Building network ..." l_out = build_network(N_BATCH) read_model_data(l_out, 'lstm_iter_60000') print "Done building network" target_values = T.tensor3('target_output') input_values = T.tensor3('input') network_output = lasagne.layers.get_output(l_out, input_values) # categorical crossentropy loss because it's the proper way cost = T.mean(categorical_crossentropy(T.reshape(network_output, (N_BATCH*MAX_LENGTH, N_FEAT_DIM)) , T.reshape(target_values, (N_BATCH*MAX_LENGTH, N_FEAT_DIM)))) all_params = lasagne.layers.get_all_params(l_out) print "Computing updates..." updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE) print "Compiling functions..." train = theano.function( [input_values, target_values], cost, updates=updates) compute_cost = theano.function([input_values, target_values], cost) train_f = open('chatlog.txt','r') f_data = train_f.read() print "Training ..." try: for n in xrange(N_ITERATIONS): X, Y = gen_data(f_data, n, N_BATCH, MAX_LENGTH) train(X, Y) if not n % CHECK_FREQUENCY: cost_val = compute_cost(X, Y) print "Iteration {} training cost = {}".format(n, cost_val) if n % CHECKPOINT_FREQUENCY == 0 and n > 0: print "Saving checkpoint..." fname = "lstm_iter_%d" % (n) write_model_data(l_out, fname) except KeyboardInterrupt: pass
def compute(file, fs=0, time_res=0, amp_res=0, fmin=0, fmax=0, fcs=[], nb_filters=0, q=0, n=0, filters=[], filters_fq=[], ax=None, plotd=True, dbfs=False, spec_only=False, spec_xlim=False, drc_tl=False, drc_th=False, drc_r=False, adc_res=16, formants=[]): """ Exécute la chaîne de traitement dans sa totalité avec le fichier audio en paramètre. :param file: Nom du fichier audio à traiter ou liste d'amplitude :type file: string ou number :param fs: Fréquence d'échantillonage (uniquement si une liste d'amplitude est donnée pour le paramètre file) :type fs: number :param adc_res: Résolution du convertisseur analogique numérique :type adc_res: number :param drc_tl: Seuil bas du compresseur audio :type drc_tl: number :param drc_th: Seuil haut du compresseur audio :type drc_th: number :param drc_r: Taux de compression du compresseur audio :type drc_r: number :param fmin: Fréquence minimum :type fmin: number :param fmax: Fréquence maximum :type fmax: number :param fcs: Liste de fréquences centrales personnalisées (dans ce cas, les paramètres fmin, fmax et nb_filters sont ignorés) :type fcs: number[] :param nb_filters: Nombre de filtres :type nb_filters: number :param q: Facteur de qualité :type q: number :param n: Ordre du filtre :type n: number :param filters: Banque de filtre déjà générée (dans ce cas, les paramètres de génération de filtres sont ignorés) :type filters: filtre[] :param filters_fq: Listes d'objets contenant "fc", "fl" et "fh" indiquant les fréquences caractéristiques du filtre associé :type filters_fq: object("fc", "fl", "fh")[] :param time_res: Résolution temporelle :type time_res: number :param amp_res: Résolution en amplitude :type amp_res: number :param formants: Liste des formants à tracer sur la figure ("a", "e", "i", "o", "u") :type formants: string[] :param ax: Surface de dessin existante (une nouvelle figure sera crée si aucune n'est donnée en paramètre) :type ax: figure :param plot_d: Si actif, affiche le spectrogramme de chaque fichier traité :type plot_d: bool :param spec_only: Si actif, affiche uniquement le spectrogramme sur mesure (dans ce cas, précisez un titre) :type spec_only: string :param spec_xlim: Modifie la limite supérieure de l'axe des abscisses du spectrogramme :type spec_xlim: number :param dbfs: Affiche le spectre db FS :type dbfs: boolean :return: Liste des segments temporels, liste des fréquences et liste des séquences d'énergies :rtype: number[], number[], number[][] """ # Récupération du fichier audio et génération du bruit (si précisé) if type(file) == list: if (type(file[0]) == str): fs, y = sw.read(file[0]) y = np.array(y) for i in range(1, len(file)): d, noise = sw.read(file[i]) for j in range(0, min(len(y), len(noise))): y[j] = y[j] + noise[j] else: y = file # Récupération du fichier audio elif type(file) == str: fs, y = sw.read(file) else: y = file N = len(y) t = np.linspace(0, N / fs, N) # Compresseur audio if drc_r != False: y = drc(y, tl=drc_tl, th=drc_th, ratio=drc_r) # Convertisseur analogique numérique if adc_res < 16: y = adc(y, adc_res) # Filtrage if ((nb_filters > 0) or (len(fcs) > 0)): filters, filters_fq = gen_filters(q, n, fs, nb_filters=nb_filters, fmin=fmin, fmax=fmax, fcs=fcs) filtered = gen_filtered(y, fs, filters) # Spectrogramme rsegs, rfreqs, rseqs = gen_data(filtered, fs, time_res, amp_res, filters_fq) # Suppression du silence au début de l'échantillon rsum = np.sum(rseqs, axis=0) for i in range(len(rsum)): if rsum[i] != 0: break if spec_only: rsegs = np.delete(rsegs, range(len(rsegs) - i, len(rsegs))) else: rsegs = np.delete(rsegs, range(0, i)) rseqs = np.delete(rseqs, range(0, i), 1) # Affichage if plotd: if spec_only: plot_datagram(rsegs, rfreqs, rseqs, title=spec_only, xlim=spec_xlim, formants=formants) else: plot_data(y, t, rsegs, rfreqs, rseqs, ax=ax, xlim=spec_xlim, dbfs=dbfs, formants=formants) return rsegs, rfreqs, rseqs
epochs = 30 X = x_train Y = y_train Z = z_train for e in range(epochs): mu, Sigma1 = E_step(beta_old, sigma_u, sigma_e, X, Y, Z) beta_old, sigma_u, sigma_e = M_step(mu, Sigma1, X, Y, Z) if e % 10 == 0: pred = prediction(X, beta_old, mu, Sigma1, Z) pred = prediction(x_test, beta_old, mu, Sigma1, z_test) plt.scatter(x_test[:, 1], pred, marker='x') print(AccuarcyCompute(pred, y_test)) if __name__ == '__main__': Xdata, y = gen_data(10) Xdata = np.concatenate((np.ones((Xdata.shape[0], 1)), Xdata), axis=1).astype(float) for i in range(len(y)): EM(y[i]) ''' (0.002460019249207814, 0.03566158009767635, 4.97) (0.002691816505975828, 0.03861964516235123, 4.6) (0.003364610734050656, 0.04351645944560852, 5.58) (0.002092487267656577, 0.03565267468930575, 5.53) (0.0017462519262057027, 0.034892449525360095, 3.7) (0.00247624675649647, 0.03753371018468406, 4.35) (0.003737213958193634, 0.04866151945218468, 8.92) (0.0026849614351548, 0.03899822956705998, 4.29) (0.0013618905932646977, 0.03088042985437457, 3.09) (0.00444138987523014, 0.05103304217010265, 4.18)
import gen_data import time import glob if __name__=="__main__": array_folder = ["training/a/*.png","training/b/*.png","training/c/*.png","training/d/*.png","training/e/*.png", "training/f/*.png","training/g/*.png","training/h/*.png","training/i/*.png","training/k/*.png", "training/l/*.png","training/m/*.png","training/n/*.png","training/o/*.png","training/p/*.png", "training/r/*.png","training/s/*.png","training/t/*.png","training/u/*.png","training/v/*.png", "training/w/*.png","training/x/*.png","training/y/*.png","training/z/*.png"] for item in array_folder: files = glob.glob(item) for file in files: gen_data.gen_data(file, ord(item.split("/")[1])) print "training complete"
import numpy as np from gen_data import gen_data from plot import plot from todo import func no_iter = 1000 # number of iteration no_train = 70 # Your code here # number of training data no_test = 30 # Yourcode here # number of testing data no_data = 100 # number of all data assert (no_train + no_test == no_data) cumulative_train_err = 0 cumulative_test_err = 0 for i in range(no_iter): X, y, w_f = gen_data(no_data) X_train, X_test = X[:, :no_train], X[:, no_train:] y_train, y_test = y[:, :no_train], y[:, no_train:] print(X_train) print(y_train) w_g = func(X_train, y_train) # Compute training, testing error # Your code here # Answer begin # e.g train_err = 0 test_err = 0 train_P, train_N = X_train.shape test_P, test_N = X_test.shape Xt = X_train.T for i in range(train_N):
import gen_data import numpy as np import model import pandas itol = lambda l: [l] data = gen_data.gen_data(gen_data.get_dists(100), 100000, 10, .2, 1 / 5, .1) train = (data[0][:70000], np.array(list(map(itol, data[1][:70000])))) test = (data[0][70000:], np.array(list(map(itol, data[1][70000:])))) def train_model(): x = model.train(list(zip(*train)), list(zip(*test)), list(zip(*test)), 1024, [5, 5], epochs=50) return x def get_res(x): res = np.array(list(map(lambda l: [l], x['results'][0][0]))) return res def get_pandas(): return pandas.DataFrame(train[0]).assign(score=train[1])
def create_data(N): data_file = "./data/data.txt" query_file = "./data/query.txt" gen_data.gen_data(data_file, N) gen_data.gen_data(query_file, N)
print("Downloading previous data from google big query...") # #drop the pandas index that is added when uploading to gbq data_gbq = data_gbq.iloc[:,1:] data_last30 = data[data['date'] >= last30] #details for data extraction bucket = "verifly-adjust" days = 30 save_path = '/home/nick/adjust/data/verifly' #download and combine batched data from yesterday raw_data_last30 = gen_data.gen_data(bucket, days, save_path) # data_last30.to_csv(local_path + 'vfly_raw_11062019.csv') #transform batched data to metrics and save as csv agg_metrics, cohort_metrics = metrics.apply_metrics(raw_data_last30) data_last30_and_aggmetrics = data_last30.append(agg_metrics, ignore_index=True) final_data = merge_data(data_last_30_and_aggmetrics, cohort_metrics) #combine the two csv files. data was saved as csv as a poor solution to merging a multiindexed dataframe # data_gbq = pd.read_csv('/home/nick/adjust/data/verifly/deliverables/vfly-deliverables.csv') # data_gbq = data_gbq.iloc[:,1:] # print('base data ') # print(data_gbq.columns) # data_yesterday = pd.read_csv("deliverables_" + str(yesterday) + ".csv")
def train(args): tf_config = None tf_config_json = None cluster = None job_name = None task_index = None ps_hosts = [] worker_hosts = [] config_file = False job_name = None task_index = 0 try: print(os.environ['TF_CONFIG']) config_file = True except KeyError: pass if config_file: tf_config = os.environ.get('TF_CONFIG', '{}') tf_config_json = json.loads(tf_config) cluster = tf_config_json.get('cluster', {}) job_name = tf_config_json.get('task', {}).get('type', "") task_index = tf_config_json.get('task', {}).get('index', "") ps_hosts = cluster.get("ps") worker_hosts = cluster.get("worker") else: ps_hosts = args.ps_hosts.split(',') worker_hosts = args.worker_hosts.split(',') job_name = args.job_name task_index = args.task_index graph = tf.Graph() var_path = cwd + '/' + args.checkpoint_dir + '/variables/' # Create a cluster from the parameter server and worker hosts. cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # Create and start a server for the local task. server = tf.train.Server(cluster, job_name=job_name, task_index=task_index) if job_name == "ps": server.join() elif job_name == "worker": # Assigns ops to the local worker by default. with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % task_index, cluster=cluster)): with graph.as_default(): # Graph object and scope created # ...now define all parts of the graph here feed_fwd_model = FF(args, graph) saver = tf.train.Saver() init = tf.global_variables_initializer() # Now that the graph is defined, create a session to begin running with tf.Session() as sess: sess.run(init) # Prepare to Save model i = 0 model = 'model%s' % i try: os.makedirs(cwd + '/' + args.checkpoint_dir) except OSError: if not os.path.isdir(cwd + '/' + args.checkpoint_dir): raise ckpt_file_index = Path(cwd + '/' + args.checkpoint_dir + '/' + model + '.ckpt.index') ckpt_file = Path(cwd + '/' + args.checkpoint_dir + '/' + model + '.ckpt') while ckpt_file_index.is_file(): i += 1 model = 'model%s' % i ckpt_file_index = Path(cwd + '/' + args.checkpoint_dir + '/' + model + '.ckpt.index') ckpt_file = Path(cwd + '/' + args.checkpoint_dir + '/' + model + '.ckpt') num_epochs = int(args.num_epochs) y_acc = np.zeros( (int(args.batch_size), int(args.num_outputs))) loss = None y_ = None w = [] b = [] if (args.restore_path != None): trained_model_saver, w, b = restore_graph(sess, args) print('...continuing training') # guards against accidental updates to the graph which can cause graph # increase and performance decay over time (with more iterations) sess.graph.finalize() for e in range(num_epochs): w, b, train_input, train_output = gen_data( int(args.batch_size), int(args.num_inputs), w, b) y_, loss, _ = sess.run(feed_fwd_model.run(), feed_dict={ feed_fwd_model.x: train_input, feed_fwd_model.y: train_output }) y_acc = y_ threshold = 1000 w_b_saved = False if ((e % 50) == 0): print('epoch: %d - loss: %2f' % (e, loss)) if (e > 0 and (e % threshold == 0)): print('Writing checkpoint %d' % e) print(train_output, w, b) print('\n') print(y_acc, sess.run(feed_fwd_model.weights)[0], sess.run(feed_fwd_model.biases)[0]) save_path, w_b_saved = checkpoint( sess, saver, ckpt_file, w, b, w_b_saved, var_path, model, e) save_path, w_b_saved = checkpoint( sess, saver, ckpt_file, w, b, w_b_saved, var_path, model) # final checkpoint print('Model saved to %s' % str(save_path)) sess.close()
x_train, x_test, y_train, y_test = train_test_split(X[:, :4], y, test_size=0.3) model = linreg.fit(x_train, y_train) n_predic_y_test = model.predict(x_test) plt.scatter(x_test[:, 1], n_predic_y_test, marker='x') #Get the Result var = np.mean((n_predic_y_test - y_test)**2) error = np.mean(abs(n_predic_y_test - y_test)) error_percentage = round(error / np.mean(abs(y_test)) * 100, 2) return var, error, error_percentage if __name__ == '__main__': X, y = gen_data(10) linreg = LinearRegression() for i in range(len(y)): print(LR(y[i])) ''' (0.0015366263968832226, 0.029558440011940437, 4.12) (0.007524175125879509, 0.06941106394372618, 9.41) (0.051449122033343286, 0.191328963800147, 23.79) (0.20463488066853824, 0.33023352718617116, 56.65) (0.11767823522992713, 0.28898224042626236, 28.81) (0.32396914319907705, 0.4598516734961317, 55.71) (0.2232187293220335, 0.39778403290574266, 82.12) (1.578167663561671, 1.0617364678404835, 94.25) (0.639188444611845, 0.6624016711869223, 60.31) (0.4300697392308613, 0.5422248373416173, 51.63)
def __init__(self,root,word_num): #word_num是一个变量 self.root = root self.data = gen_data.gen_data("recite") self.word_num = word_num self.seq = 1
inputs_test = pt.autograd.Variable(inputs_test) labels_test = pt.autograd.Variable(labels_test) random_e_test = pt.from_numpy(random_effect_test).float() outputs_test = model(inputs_test.float(), random_e_test) error_percentage = AccuarcyCompute(outputs_test, labels_test.float()) if error_percentage[2] == last: repeat += 1 else: last = error_percentage[2] if repeat > 5: break print("Testing", error_percentage) if __name__ == '__main__': Xdata, y = gen_data(5, size=1000) Xdata = np.concatenate((np.ones((Xdata.shape[0], 1)), Xdata), axis=1).astype(float) for i in range(len(y)): EMNN(y[i]) ''' Testing (0.0015800932, 0.029433494, 4.1) Testing (0.0029981958, 0.044504516, 4.91) Testing (0.002603048, 0.0364661, 4.62) Testing (0.002724695, 0.042232957, 6.26) Testing (0.002901133, 0.038665723, 4.27) Testing (0.0019755429, 0.036214557, 3.75) Testing (0.002415244, 0.040518273, 6.68) Testing (0.0023834745, 0.035962757, 3.54) Testing (0.0021507577, 0.035938308, 3.04) Testing (0.0031414144, 0.044444475, 3.79)
if __name__ == '__main__': #import data that does not change between batches #conf_matrices = loadmat('conf_matrices.mat') #tmpCM = [] #tmpCM1 = [] #for iN in range(conf_matrices['conf_matrices'].size): # tmpCM.append(conf_matrices['conf_matrices'][iN]['userID'][0][0][0]) # tmpCM1.append(conf_matrices['conf_matrices'][iN]['conf_matrix'][0]) #conf_matrices = pd.DataFrame({ 'userID' : tmpCM,'conf_matrix' : tmpCM1}) retired_images = pd.DataFrame({ 'imageID' : [], 'class' : []}) PP_matrices = pd.DataFrame({ 'imageID' : [],'pp_matrix' : []}) hold,conf_matrices = gen_data.gen_data() #for loop to iterate over each batch for i in range(1,2): #batch_name = 'batch' + str(i) + '.mat' #batch1.mat, batch2.mat, etc #batch = loadmat(batch_name) #read batch file #tmpType = [] #tmpLabels = [] #tmpuserIDs = [] #tmpTruelabel = [] #tmpImageID = [] #tmpML_posterior = [] # Subtracting 1 off the index from the mat file for the "labels" so that the indexing works in python. #for iN in range(batch['images'].size): # tmpType.append(batch['images'][iN]['type'][0][0]) # tmpLabels.append(batch['images'][iN]['labels'][0][0]-1)
# Get a guess #lsmr_solver = LSMRsolver(data, guesses) #embed() # ================ #Solver1 = LBFGSsolver(data=data, guess=truth, truth=truth) #Solver2 = LBFGSsolver(data=data, guess=guesses, truth=truth) #prm = np.load("lsmr_solver_x.npy") #prm[:2*lsmr_solver.Nhkl] = np.exp(prm[:2*lsmr_solver.Nhkl]) #guesses["Gprm"] = prm[2*lsmr_solver.Nhkl:] # .. if __name__ == "__main__": data = gen_data.gen_data(Nshot_max=500) guesses = gen_data.guess_data(data, perturbate=True) truth = gen_data.guess_data(data, perturbate=False) #prm = np.load("_temp_4.npz") #guesses["IAprm"] = prm["AmpA_final"] #guesses["IBprm"] = prm["AmpB_final"] #guesses["Gprm"] = prm["Gain_final"] #LogSolve = LogIsolver(data=data, guess=guesses, truth=truth) LogSolveCurve = LogIsolverCurve(use_curvatures=False, data=data, guess=guesses, truth=truth) embed()