def main(): train_data = loader.load_data('data/train.csv') valid_data = loader.load_data('data/dev.csv') print('Data fetched') vocab = make_vocab(train_data) word2index, index2word = make_indices(vocab) pickle_in1 = open("dict.pickle", "rb") wordEmbeddings = pickle.load(pickle_in1) pickle_in2 = open("mean.pickle", "rb") meanVec = pickle.load(pickle_in2) train_id, train, train_labels = convert_to_vector_representation( train_data, word2index, wordEmbeddings, meanVec) valid_id, valid, valid_labels = convert_to_vector_representation( valid_data, word2index, wordEmbeddings, meanVec) classifier = svm.SVC() print('Training') classifier.fit(train, train_labels) print('Evaluating') print(classifier.predict(valid)) print(classifier.score(valid, valid_labels))
def __init__(self, train_file, test_file, addnoise, noise_scale = 0.0): if (not (os.path.isfile(train_file))): raise Exception('No Such Training File') if (not (os.path.isfile(test_file))): raise Exception('No Such Test File') self.train_x, self.train_t, self.train_y = load_data(train_file) if (addnoise): self.train_y += np.random.normal(0, noise_scale, size = self.train_y.shape) self.test_x, self.test_t, self.test_y = load_data(test_file) self.x = self.train_x self.t = self.train_t self.y = self.train_y
def import_aq_database(opts, force=False): # try: exec_sql = ExecuteSQL.ExecuteSQL(opts.db_type, opts.db_host, opts.db_user, opts.db_pass, opts.db_name) create_db_directories(opts.aq_db_path, opts.aq_db_name, force) db_ini_filename = generate_ini(opts.aq_db_path, opts.aq_db_name, opts.aq_engine, opts.aq_loader) generate_base_desc(exec_sql, opts.aq_db_name, opts.aq_db_path + "/" + opts.aq_db_name + "/base_struct/base.aqb") export_data(exec_sql, opts.aq_db_path + "/" + opts.aq_db_name + "/data_orga/tables/") loader.load_data(opts.aq_tools, opts.aq_db_name) # FIXME
def predict(args): "perdict on a given dataset" if not args.private_file: data = load_data(DATA_NAME, samp_size=100000, all_=False) else: data = load_data(args.private_file) prep = ChrunPrep() X, index = prep.transform(data) classifier = Modelling(args.model_type) preds = classifier.predict(X) maybe_mkdir(args.out_path) out_path = os.path.join(args.out_path, "preds.csv") pd.Series(preds, index=index).to_csv(out_path, sep=";")
def create_pickle_dataset(): loader.maybe_download_and_extract() test_images, test_cls, test_labels = loader.load_data("test") dataset = split_test_dataset(test_images, test_cls, test_labels) train_images, train_cls, train_labels = loader.load_data("train") dataset.setdefault('train_images', train_images) dataset.setdefault('train_labels', train_labels) dataset.setdefault('train_cls', train_cls) dataset.setdefault('class_names', loader.labels) to_pickle(dataset) return dataset
def main(): train = loader.load_data() train = clean_data(lm_cook_processer(train)) test = loader.load_data(test=True) test = clean_data(lm_cook_processer(test,test=True)) # tuning parameter a for language model # a = [0.05,0.1,0.15,0.2,0.25,0.3] # for e in a: # print "a: " + str(e) # print "score: " + str(cross_validation(train,'ingredients','cuisine', e)) model = lm(train,'ingredients','cuisine') pred = lm.predict(test['ingredients']) pred_to_out(pred,test.index)
def train(rundir, diagnosis, epochs, learning_rate, use_gpu): train_loader, valid_loader, test_loader = load_data(diagnosis, use_gpu) model = MRNet() if use_gpu: model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), learning_rate, weight_decay=.01) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=.3, threshold=1e-4) best_val_loss = float('inf') start_time = datetime.now() for epoch in range(epochs): change = datetime.now() - start_time print('starting epoch {}. time passed: {}'.format(epoch+1, str(change))) train_loss, train_auc, _, _ = run_model(model, train_loader, train=True, optimizer=optimizer) print(f'train loss: {train_loss:0.4f}') print(f'train AUC: {train_auc:0.4f}') val_loss, val_auc, _, _ = run_model(model, valid_loader) print(f'valid loss: {val_loss:0.4f}') print(f'valid AUC: {val_auc:0.4f}') scheduler.step(val_loss) if val_loss < best_val_loss: best_val_loss = val_loss file_name = f'val{val_loss:0.4f}_train{train_loss:0.4f}_epoch{epoch+1}' save_path = Path(rundir) / file_name torch.save(model.state_dict(), save_path)
def build_dataset(self, msg_queue): save_func = save.get_save_func(self.final["DATASET_FILE_EXTENSION"]) msg_queue.put("Loading datasets. This may take a while. \n") # Load individual datasests datasets = loader.load_data(self.final["CONFIGS"], msg_queue) msg_queue.put("Finished loading datasets! \n" + "-" * 50 + "\n") msg_queue.put("Constructing new dataset \n") # Construct new dataset training_set, test_set = construct.get_subset(datasets, self.final) msg_queue.put("Finished Constructing datasets! \n" + "-" * 50 + "\n") msg_queue.put("Saving dataset \n") save_path = self.get_save_path(self.final["DATASET_NAME"] + "_training_set") save_func(save_path, training_set) save_path = self.get_save_path(self.final["DATASET_NAME"] + "_test_set") save.save_multiple(test_set, save_path, save_func) msg_queue.put("Finished saving dataset \n" + "-" * 50 + "\n") msg = "Do you want to save dataset statistics ?" if messagebox.askyesno("SAVE", msg): msg_queue.put("Saving dataset statistics \n") # Save statistics of each individual dataset datasets_stats = stats.generate_stats(test_set) save_path = self.get_save_path("test_set_stats") save_func(save_path, datasets_stats, fieldnames=datasets_stats[0].keys()) msg_queue.put("Finished saving statistics \n" + "-" * 50 + "\n") self.manager.update()
def svm_baseline(): training_data=[] validation_data=[] test_data=[] training_data, validation_data, test_data = loader.load_data() # train clf = svm.SVC() #print training_data[1] #print training_data[0] #t=np.reshape(training_data,(-1,1)) t_x=np.asmatrix(training_data[0]) t_y=np.asmatrix(training_data[1]) te_x=np.asmatrix(test_data[0]) te_x=np.asmatrix(test_data[1]) clf.fit(training_data[0], training_data[1]) # test predictions = [int(a) for a in clf.predict(test_data[0])] num_correct = sum(int(a == y) for a, y in zip(predictions, test_data[1])) print "Baseline classifier using an SVM." print "%s of %s values correct." % (num_correct, len(test_data[1])) a=num_correct b=len(test_data[1]) c=a/(b*1.0)*100 print "accuracy - %f"%c
def evaluate(path, split, model_path, use_gpu): train_loader, valid_loader, test_loader = load_data(path, use_gpu) model = TripleMRNet() state_dict = torch.load(model_path, map_location=(None if use_gpu else 'cpu')) model.load_state_dict(state_dict) if use_gpu: model = model.cuda() if split == 'train': loader = train_loader elif split == 'valid': loader = valid_loader elif split == 'test': loader = test_loader else: raise ValueError("split must be 'train', 'valid', or 'test'") loss, auc, accuracy, preds, labels = run_model(model, loader) print(f'{split} loss: {loss:0.4f}') print(f'{split} AUC_abnormal: {auc[0]:0.4f}') print(f'{split} AUC_acl: {auc[1]:0.4f}') print(f'{split} AUC_meniscus: {auc[2]:0.4f}') return preds, labels
def main(): """ Perform n-fold cross-validation to evaluate knn and perceptron algorithms for classification of a dataset of Iris species. """ # Load the data x_labels = [ "SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm" ] x, y, type2id = loader.load_data('Iris.csv', y_label="Species", x_labels=x_labels) # Split into 75% train & 25% test train_x, train_y, test_x, test_y = loader.split_data(x, y, ratio=.25) print("RUNNING Perceptron: ") run_classifier(train_x, train_y, test_x, test_y, Classifier=Perceptron, Param='N') print("RUNNING KNN: ") run_classifier(train_x, train_y, test_x, test_y, Classifier=KNN, Param='K')
def __init__(self, epoch=1000, lr=0.0001): super(TrainLeNet, self).__init__() print("训练准备.......") # 开始 # 二进制模型文件 self.model_file = "lenet.pth" # self.CUDA true false self.CUDA = torch.cuda.is_available() self.train_loader, self.test_loader = loader.load_data() self.net = LeNet5() params = self.net.parameters() if self.CUDA: self.net.cuda() if os.path.exists(self.model_file): print("加载本地模型") # 加载本地模型 state = torch.load(self.model_file) self.net.load_state_dict(state) # 3、参数 self.epoch = epoch self.lr = lr # 损失函数 self.loss_function = torch.nn.CrossEntropyLoss() # 优化器 学习率 self.optimizer = torch.optim.Adam(self.net.parameters(), self.lr) if self.CUDA: self.loss_function = self.loss_function.cuda()
def evaluate(split, model_name, model_path, augment, use_gpu): train_loader, valid_loader, test_loader = load_data(augment, use_gpu) writer = SummaryWriter() model = NetFactory.createNet(model_name) state_dict = torch.load(model_path, map_location=(None if use_gpu else 'cpu')) model.load_state_dict(state_dict) if use_gpu: model = model.cuda() if split == 'train': loader = train_loader elif split == 'valid': loader = valid_loader elif split == 'test': loader = test_loader else: raise ValueError("split must be 'train', 'valid', or 'test'") loss, auc, preds, labels = run_model(writer, 1, model, loader) print(f'{split} loss: {loss:0.4f}') print(f'{split} AUC: {auc:0.4f}') return preds, labels
def evaluate(path, split, angle, face, model_path, use_gpu, filename): data_train, data_valid, data_test, data_A, data_B, data_D = load_data(path) model = CNNNet() state_dict = torch.load(model_path, map_location=(None if use_gpu else 'cpu')) model.load_state_dict(state_dict) if use_gpu: model = model.cuda() if split == 'tileA': loader = DataLoader(data_A, batch_size=32, num_workers=12, shuffle=False) elif split == 'tileB': loader = DataLoader(data_B, batch_size=32, num_workers=12, shuffle=False) elif split == 'tileD': loader = DataLoader(data_D, batch_size=32, num_workers=12, shuffle=False) else: raise ValueError("split must be 'train', 'valid', or 'test'") _, _, preds, labels = run_model(model, loader) # figure acc = plot_class(split, angle, face, preds, labels, filename) return preds, labels, acc
def main(): train = loader.load_data() train = clean_data(lm_cook_processer(train)) test = loader.load_data(test=True) test = clean_data(lm_cook_processer(test,test=True)) # tuning parameter a for language model # a = [0.05,0.1,0.15,0.2,0.25,0.3] # for e in a: # print "a: " + str(e) # print "score: " + str(cross_validation(train,'ingredients','cuisine', e)) model = lm.fit(train,'ingredients','cuisine') pred = lm.predict(test['ingredients'],model) pred_to_out(pred,test.index)
def makepredictions(modelpath,outputfile,training_dir): train_dir = training_dir image_num = 16000 val_split = 0 X_test_filenames, y_test = load_data(train_dir, image_num, val_split) batch_size = 11 print(np.argmax(y_test,axis=1)) test_generator = Generator(X_test_filenames, y_test, batch_size) reconstructed_model = keras.models.load_model(modelpath) reconstructed_model.summary() prediction = reconstructed_model.predict_generator(test_generator) ## drei größten fehler von dogs in wildlife # label = np.argmax(y_test,axis=1) # wildlife = prediction[:,1] # wilddogs = wildlife[label==2] # dogfiles = X_test_filenames[label==2] # indices = wilddogs.argsort()[-3:][::-1] # print(indices) # print(wilddogs[indices]) # print(dogfiles[indices]) np.savetxt(outputfile,np.vstack((np.arange(len(y_test)),np.argmax(y_test,axis=1),np.array(prediction[:,0]),np.array(prediction[:,1]),np.array(prediction[:,2]))).T)
def __init__(self, args): self.args = args self.timeline = deque() self.data = load_data(args) self.bucket = {} self.topo_graph = {} # is dominated, can find global using dfs self.reverse_topo_graph = {} # dominate, used to find fathers
def evaluate(split, model_path, diagnosis, use_gpu): train_loader, valid_loader, test_loader = load_data(diagnosis, use_gpu) model = MRNet() state_dict = torch.load(model_path, map_location=(None if use_gpu else 'cpu')) model.load_state_dict(state_dict) if use_gpu: model = model.cuda() if split == 'train': loader = train_loader elif split == 'valid': loader = valid_loader elif split == 'test': loader = test_loader else: raise ValueError("split must be 'train', 'valid', or 'test'") loss, auc, preds, labels = run_model(model, loader) print(f'{split} loss: {loss:0.4f}') print(f'{split} AUC: {auc:0.4f}') return preds, labels
def evaluate(split, model_dir, use_gpu=True): model = Combine() if use_gpu: model = model.cuda() state_dict = torch.load( '/home/Mara/run_baseline_acl_meniscus_gap/val0.3271_train0.2068_epoch22', map_location=(None if use_gpu else 'cpu')) model.load_state_dict(state_dict) train_loader, valid_loader, test_loader = load_data(model_dir, use_gpu) if split == 'train': loader = train_loader elif split == 'valid': loader = valid_loader elif split == 'test': loader = test_loader else: raise ValueError("split must be 'train', 'valid', or 'test'") loss, auc, accuracy, preds, labels = run_model(model, loader) print(f'{split} loss: {loss:0.4f}') print(f'{split} AUC_acl: {auc[0]:0.4f}') print(f'{split} AUC_meniscus: {auc[1]:0.4f}') # print(f'{split} AUC_abnormal: {auc[0]:0.4f}') return preds, labels, model, loader
def raw_value_comparison(coh, plot=False): """Return the average differences in raw copy number values between the gene-level calls in hg19 and hg38 for each gene for a given tumor type 'coh.' If plot=True, plot the genes' differences in a histogram.""" # load in the data df_38, df_19 = loader.load_data(hg38_dir, hg19_dir, coh, thresh=False) # compute average sample-by-sample differences for each gene df_s = df_38 - df_19 avg_diff = { g: np.average(df_s[g]) for g in df_s.columns.get_level_values('Gene Symbol') } # take note of which genes are altered more than our threshold of 4*std results = [] std = np.std([avg_diff[x] for x in avg_diff]) for g in avg_diff: if avg_diff[g] > 4 * std: results.append([coh, 'Pos', g, avg_diff[g]]) elif avg_diff[g] < -4 * std: results.append([coh, 'Neg', g, avg_diff[g]]) if plot: plt.hist([avg_diff[x] for x in avg_diff], bins=1000) plt.title(coh, fontsize=16) plt.xlabel('Average CN Difference Between Alignments', fontsize=14) plt.ylabel('Genes', fontsize=14) sns.despine() plt.savefig(coh + '_genehist.pdf') plt.savefig(coh + '_genehist.png') plt.clf() return results
def run_test(): print(' loading data ...') documents = load_data() # results = [find_full_line_names(d) for d in documents] # results = [find_dates(d) for d in documents] # results = [find_locations(d) for d in documents] print(len(documents))
def main(): train_data, train_label, test_data, test_label = load_data(5000, 1000) knn = KNN(train_data, train_label) prediction = knn.predict(test_data, 9) num_correct = np.sum(prediction == test_label) print("KNN") print(f"Correct:\t{num_correct} / {len(test_label)}") print(f"Accuracy:\t{num_correct / len(test_label)}")
def svm_baseline(): training_data, validation_data, test_data = loader.load_data() clf = svm.SVC() clf.fit(training_data[0], training_data[1]) predictions = [int(a) for a in clf.predict(test_data[0])] num_correct = sum(int(a == y) for a, y in zip(predictions, test_data[1])) print "Baseline classifier using an SVM." print "%s of %s values correct." (num_correct, len(test_data[1]))
def main(): data = { 'JIT' : loader.load_data('output/results_jit.csv', jit=True), 'JIT -L' : loader.load_data('output/results_jit(-L).csv'), 'Interpreter' : loader.load_data('output/results_interpreter.csv'), 'Hybrid' : loader.load_data('output/results_hybrid.csv'), 'Hybrid -L' : loader.load_data('output/results_hybrid(-L).csv'), 'Hybrid -LS' : loader.load_data('output/results_hybrid(-LS).csv'), } t_vals = [1, 10, 100, 1000, 10000] data_hybrid_t = { 'Hybrid -L -T%d' % t: loader.load_data('output/results_hybrid(-L-T%d).csv' % t) for t in t_vals } col_map = plot.make_col_map(data) data_sets = { 'all' : data, 'emulators' : data_proc.select(data, ['JIT -L', 'Interpreter', 'Hybrid -L']), 'jit' : data_proc.select(data, ['JIT', 'JIT -L']), 'hybrid' : data_proc.select(data, ['Hybrid', 'Hybrid -L', 'Hybrid -LS']), 'single/jit' : data_proc.select(data, ['JIT']), } draw_vs_scatters(data) draw_histograms(data) for (name, data) in data_sets.items(): draw_testbatches(name, data, col_map) draw_scatters(name, data) draw_testbatches('hybrid_t', data_hybrid_t)
def test_plot_image(self): x_train, y_train, x_test, y_test = loader.load_data( param_config.PATH, param_config.X_PKL, param_config.Y_PKL, param_config.INCEPT_WIDTH, param_config.INCEPT_HEIGHT ) loader.plot_image(x_train[random.randint(0, len(x_train))])
def get_symbol_classifier(): train_data_dir = "./data/" tr_X, tr_y = load_data(train_data_dir) tr_feat = get_features(tr_X) knn_classifier = KNeighborsClassifier(n_neighbors=10, weights='uniform', n_jobs=4) knn_classifier.fit(tr_feat, tr_y) return knn_classifier
def main(): train_data, train_label, test_data, test_label = load_data(5000, 1000) svm = SVM(train_data.shape[1], 10) svm.train(train_data, train_label, 1000) prediction = svm.predict(test_data) num_correct = np.sum(prediction == test_label) print("SVM") print(f"Correct:\t{num_correct} / {len(test_label)}") print(f"Accuracy:\t{num_correct / len(test_label)}")
def preprocess_data(size): training_size = int(size * 0.9) all_data = loader.load_data(size) random.shuffle(all_data) training_data = all_data[:training_size] validation_data = all_data[training_size:] return (np.array([x[1] for x in training_data]), np.array([x[-1][0] for x in training_data])), \ (np.array([x[1] for x in validation_data]), np.array([x[-1][0] for x in validation_data]))
def import_aq_database(opts, force=False): # try: exec_sql = ExecuteSQL.ExecuteSQL(opts.db_type, opts.db_host, opts.db_user, opts.db_pass, opts.db_name) create_db_directories(opts.aq_db_path, opts.aq_db_name, force) db_ini_filename = generate_ini(opts.aq_db_path, opts.aq_db_name, opts.aq_engine, opts.aq_loader) generate_base_desc( exec_sql, opts.aq_db_name, opts.aq_db_path + '/' + opts.aq_db_name + '/base_struct/base.aqb') export_data(exec_sql, opts.aq_db_path + '/' + opts.aq_db_name + '/data_orga/tables/') loader.load_data(opts.aq_tools, opts.aq_db_name) # FIXME
def data_to_hdf5(_): from loader import load_data print('Loading data...') calendar, prices, sales = load_data() print('Saving as hdf5...') calendar.to_hdf('data/data.h5', key='calendar') prices.to_hdf('data/data.h5', key='prices') sales.to_hdf('data/data.h5', key='sales')
def train(rundir, path, epochs, learning_rate, use_gpu): rundir = rundir + '/' train_loader, valid_loader, test_loader = load_data(path, use_gpu) model = TripleMRNet() #1, 32, 64 if use_gpu: model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), learning_rate, weight_decay=0.01) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=.3, threshold=1e-5) best_val_loss = float('inf') start_time = datetime.now() for epoch in range(epochs): change = datetime.now() - start_time print('starting epoch {}. time passed: {}\n'.format( epoch + 1, str(change))) train_loss, train_auc, train_accuracy, _, _ = run_model( model, train_loader, train=True, optimizer=optimizer) print(f'train loss: {train_loss:0.4f}') print(f'train AUC_abnormal: {train_auc[0]:0.4f}') print(f'train AUC_acl: {train_auc[1]:0.4f}') print(f'train AUC_meniscus: {train_auc[2]:0.4f}\n') #print(f'train accuracy_abnormal: {train_accuracy[0]:0.4f}') #print(f'train accuracy_acl: {train_accuracy[1]:0.4f}') #print(f'train accuracy_meniscus: {train_accuracy[2]:0.4f}\n') val_loss, val_auc, val_accuracy, _, _ = run_model(model, valid_loader) print(f'valid loss: {val_loss:0.4f}') print(f'valid AUC_abnormal: {val_auc[0]:0.4f}') print(f'valid AUC_acl: {val_auc[1]:0.4f}') print(f'valid AUC_meniscus: {val_auc[2]:0.4f}\n') #print(f'valid accuracy_abnormal: {val_accuracy[0]:0.4f}') #print(f'valid accuracy_acl: {val_accuracy[1]:0.4f}') #print(f'valid accuracy_meniscus: {val_accuracy[2]:0.4f}\n') scheduler.step(val_loss) if val_loss < best_val_loss: best_val_loss = val_loss file_name = f'val{val_loss:0.4f}_train{train_loss:0.4f}_epoch{epoch+1}' save_path = Path(rundir) / file_name torch.save(model.state_dict(), save_path)
def fit(args): "fit preprocessor and model" data = load_data(DATA_NAME, samp_size=10000, all_=False) prep = ChrunPrep() X = prep.fit_transform(data) y = prep.create_labels(data) classifier = Modelling(model=args.model_type) classifier.fit(X, y)
def main(): img = loader.load_data(sys.argv[FIRST_ARG]) for power in range(1, 5): centroids = init_centroids.init_centroids(np.power(2, power)) model = k_means.KMeans(centroids, img) new_img = model.algorithm(EPOCH) new_img = np.reshape( new_img, (int(sys.argv[SEC_ARG]), int(sys.argv[THIRD_ARG]), RGB_SIZE)) plot.plot(new_img)
def main(): train = loader.load_data() ref, (train_x, train_y) = clean_data(tfidf_cook_processer(train)) test = loader.load_data(test=True) test_x = clean_data(tfidf_cook_processer(test,test=True),test=True) # get from cross validation gamma = 1 C = 3.1622776601683795 clf = SVC(gamma=gamma, C=C, probability=True) # print cross_validation.cvScore(clf, train_x, train_y).mean() # random forest # clf = RandomForestClassifier(n_estimators=100) #rank 1078 clf.fit(train_x,train_y) pred_to_out(clf.predict(test_x),ref,test)
def augment(n=4): for i in range(n): for image_dir in image_dirs: # Dummy model model = Sequential() model.add(Convolution2D( 1, 1, 1, input_shape=(1, img_rows, img_cols))) model.add(Flatten()) model.add(Dense(4)) model.compile(loss='mse', optimizer='SGD') datagen = ImageDataGenerator( featurewise_center=True, featurewise_std_normalization=False, # Seems like not working at all! rotation_range=180, zca_whitening=False, # shear_range=0.3, # zoom_range=0.1, # width_shift_range=0.1, # height_shift_range=0.1, horizontal_flip=True, vertical_flip=True) (X_train, Y_train), (X_test, Y_test) = load_data( False, [image_dir]) save_path = image_dir + data_dir[:-1] + 'A' if not path.exists(save_path): makedirs(save_path)vim-gas datagen.fit(X_train) model.fit_generator(datagen.flow(X_train, Y_train, save_to_dir=save_path, save_prefix='_' + str(i), save_format='png'), samples_per_epoch=X_train.shape[0], nb_epoch=1)
hyperParameters = Params() def iterate_minibatches(inputs, targets, batchsize, shuffle=False): assert len(inputs) == len(targets) if shuffle: indices = np.arange(len(inputs)) np.random.shuffle(indices) for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): if shuffle: excerpt = indices[start_idx:start_idx + batchsize] else: excerpt = slice(start_idx, start_idx + batchsize) yield inputs[excerpt], targets[excerpt] print("Loading data...") data = loader.load_data() x_train = data['X_train'] y_train = data['Y_train'] x_test = data['X_test'] y_test = data['Y_test'] input_var = T.tensor4('inputs') target_var = T.ivector('targets') print("Building model and compiling functions...") network = networks.build_cnn(input_var) prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization.
def test_mlp( initial_learning_rate, learning_rate_decay, squared_filter_length_limit, n_epochs, batch_size, mom_params, activations, dropout, dropout_rates, results_file_name, layer_sizes, dataset, use_bias, random_seed=1234): """ The dataset is the one from the mlp demo on deeplearning.net. This training function is lifted from there almost exactly. :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ assert len(layer_sizes) - 1 == len(dropout_rates) # extract the params for momentum mom_start = mom_params["start"] mom_end = mom_params["end"] mom_epoch_interval = mom_params["interval"] datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch epoch = T.scalar() x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels learning_rate = theano.shared(np.asarray(initial_learning_rate, dtype=theano.config.floatX)) rng = np.random.RandomState(random_seed) # construct the MLP class classifier = MLP_Dropout(rng=rng, input=x, layer_sizes=layer_sizes, dropout_rates=dropout_rates, activations=activations, use_bias=use_bias) # Build the expresson for the cost function. cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) # Compile theano function for testing. test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) #theano.printing.pydotprint(test_model, outfile="test_file.png", # var_with_name_simple=True) # Compile theano function for validation. validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) #theano.printing.pydotprint(validate_model, outfile="validate_file.png", # var_with_name_simple=True) # Compute gradients of the model wrt parameters gparams = [] for param in classifier.params: # Use the right cost function here to train with or without dropout. gparam = T.grad(dropout_cost if dropout else cost, param) gparams.append(gparam) # ... and allocate mmeory for momentum'd versions of the gradient gparams_mom = [] for param in classifier.params: gparam_mom = theano.shared(np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX)) gparams_mom.append(gparam_mom) # Compute momentum for the current epoch mom = ifelse(epoch < mom_epoch_interval, mom_start*(1.0 - epoch/mom_epoch_interval) + mom_end*(epoch/mom_epoch_interval), mom_end) # Update the step direction using momentum updates = OrderedDict() for gparam_mom, gparam in zip(gparams_mom, gparams): # Misha Denil's original version #updates[gparam_mom] = mom * gparam_mom + (1. - mom) * gparam # change the update rule to match Hinton's dropout paper updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam # ... and take a step along that direction for param, gparam_mom in zip(classifier.params, gparams_mom): # Misha Denil's original version #stepped_param = param - learning_rate * updates[gparam_mom] # since we have included learning_rate in gparam_mom, we don't need it # here stepped_param = param + updates[gparam_mom] # This is a silly hack to constrain the norms of the rows of the weight # matrices. This just checks if there are two dimensions to the # parameter and constrains it if so... maybe this is a bit silly but it # should work for now. if param.get_value(borrow=True).ndim == 2: #squared_norms = T.sum(stepped_param**2, axis=1).reshape((stepped_param.shape[0],1)) #scale = T.clip(T.sqrt(squared_filter_length_limit / squared_norms), 0., 1.) #updates[param] = stepped_param * scale # constrain the norms of the COLUMNs of the weight, according to # https://github.com/BVLC/caffe/issues/109 col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0)) desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit)) scale = desired_norms / (1e-7 + col_norms) updates[param] = stepped_param * scale else: updates[param] = stepped_param # Compile theano function for training. This returns the training cost and # updates the model parameters. output = dropout_cost if dropout else cost train_model = theano.function(inputs=[epoch, index], outputs=output, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) #theano.printing.pydotprint(train_model, outfile="train_file.png", # var_with_name_simple=True) # Theano function to decay the learning rate, this is separate from the # training function because we only want to do this once each epoch instead # of after each minibatch. decay_learning_rate = theano.function(inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay}) ############### # TRAIN MODEL # ############### print '... training' best_params = None best_validation_errors = np.inf best_iter = 0 test_score = 0. epoch_counter = 0 start_time = time.clock() results_file = open(results_file_name, 'wb') while epoch_counter < n_epochs: # Train this epoch epoch_counter = epoch_counter + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(epoch_counter, minibatch_index) # Compute loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_errors = np.sum(validation_losses) # Report and save progress. print "epoch {}, test error {}, learning_rate={}{}".format( epoch_counter, this_validation_errors, learning_rate.get_value(borrow=True), " **" if this_validation_errors < best_validation_errors else "") best_validation_errors = min(best_validation_errors, this_validation_errors) results_file.write("{0}\n".format(this_validation_errors)) results_file.flush() new_learning_rate = decay_learning_rate() end_time = time.clock() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_errors * 100., best_iter, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
# start logger = get_logger() logger.info('--------------- Encoder - Decoder ---------------') # load data src_vocab_size = 5000 dest_vocab_size = 5000 logger.info('loading data...') (train_x, test_x, valid_x, train_y, test_y, valid_y, mask_train_x, mask_test_x, mask_valid_x, mask_train_y, mask_test_y, mask_valid_y, src_index2word, src_word2index, dest_index2word, dest_word2index) = load_data('data/train50000.ja', 'data/train50000.en', train_size=0.8, valid_size=0.2, src_word_limit=src_vocab_size, dest_word_limit=dest_vocab_size) logger.info('loaded training source senstences: {0}, max length: {1}, vocabulary size: {2}'.format( len(train_x.get_value(borrow=True)), len(mask_train_x.get_value(borrow=True)), len(list(src_word2index)))) logger.info('loaded training target senstences: {0}, max length: {1}, vocabulary size: {2}'.format( len(train_y.get_value(borrow=True)), len(mask_train_y.get_value(borrow=True)), len(list(dest_word2index)))) # encoder encoder_vocab_size = len(src_word2index) encoder_embedding_size = 100 encoder_hidden_size = 50 encoder = Encoder(encoder_vocab_size, encoder_embedding_size, encoder_hidden_size) # decoder decoder_vocab_size = len(dest_word2index) decoder_embedding_size = 100
import numpy as np from loader import load_data, standardize from classification.mlp1 import MLP n_in = 10 n_hidden = 25 n_out = 2 dropout_prob = 0.5 learning_rate = 0.1 n_epochs = 500 batch_size = 10 weight_decay = 0.0001 K = 10 x, y = load_data('../data1.txt') roc_file = open('roc.txt', 'w') step = len(x) / K + 1 confusion_matrix = np.array([0, 0, 0, 0]) for i in np.arange(0, len(x) + 1, step): valid_set = x[i:i + step, :], y[i:i + step] train_set = np.delete(x, range(i, i + step), axis=0), np.delete(y, range(i, i + step)) datasets = standardize(train_set, valid_set) ml = MLP(n_in, n_hidden, n_out, dropout_prob, learning_rate, weight_decay) confusion_matrix += ml.test_mlp(datasets, n_epochs, batch_size, roc_file) print 'tp tn fp fn : ', confusion_matrix roc_file.close()
# --------------------------- PREPARE TRAINING & TEST DATA ---------------------------- import librosa import json from numpyEncoder import * import loader trainingData = loader.load_data() # this is used for testing with the same people testData = loader.alternative_testData() print "length of training set: ",len(trainingData) print "length of test data: ", len(testData) # ------------------------------------ NETWORK ---------------------------------------- import network eta = 1.5 NUM_EPOCHS = 30 # for CQT # INPUT_NEURONS = 84 * 44 # for mel spectogram INPUT_NEURONS = 128 * 44 HIDDEN_LAYER1 = 50 HIDDEN_LAYER2 = 50
@author: SeylomA ''' import numpy as np import loader from sklearn.cross_validation import train_test_split from sklearn.preprocessing import scale from sklearn.grid_search import GridSearchCV from sklearn.metrics import classification_report from sklearn.svm import SVC from sklearn.cross_validation import StratifiedKFold if __name__ == "__main__": # added for multiprocessor support in windows. train_X, train_Y, test = loader.load_data() n_samples = len(train_Y) ############################################################################### # # Set the parameters by cross-validation # tuned_parameters = [{'kernel': ['poly'], 'gamma': [2 ** -9, 2 ** -8.25, 2 ** -8.5, 2 ** -8.25, 2 ** -7], # 'C': [2 ** 1.8, 2 ** 2.2, 2 ** 2.4, 2 ** 2 ** 2.26, 2 ** 2 ** 2.8, 2 ** 3], 'degree':[3, 4]}] # # Set the parameters by cross-validation param_grid = [{'kernel': ['rbf'], 'gamma': [2 ** -15, 2 ** -13], 'C': [2 ** -4, 2 ** -2], 'degree':[3]}] X_train, X_test, y_train, y_test = train_test_split( train_X[:n_samples], train_Y[:n_samples], test_fraction=0.3, random_state=1)
def plot(limit_type, filename, logy=True, smooth_data=True): """ Application entry point """ supported_limit_types = ("narrow", "wide", "kk") if limit_type not in supported_limit_types: raise RuntimeError("supported limit types: {0!r}".format(supported_limit_types)) data = load_data(filename, 0.1) if smooth_data: smooth.data(data, n=40, log=logy) # print(sorted(data.keys())) # for m in sorted(data.keys()): # print(m, data[m][0]) legend = ROOT.TLegend(0.5, 0.50, 0.80, 0.88) line_width = 3 combo = ROOT.TMultiGraph() # 2 sigma band cur = get_limits(data) graph = ROOT.TGraphAsymmErrors( len(cur["x"]), cur["x"], cur["expected"], cur["xerr"], cur["xerr"], cur["two_sigma_down"], cur["two_sigma_up"] ) graph.SetFillColor(ROOT.kGray + 1) graph.SetLineWidth(0) combo.Add(graph) g_2s = graph # 1 sigma band graph = ROOT.TGraphAsymmErrors( len(cur["x"]), cur["x"], cur["expected"], cur["xerr"], cur["xerr"], cur["one_sigma_down"], cur["one_sigma_up"] ) graph.SetFillColor(ROOT.kGray) graph.SetLineWidth(0) combo.Add(graph) g_1s = graph # expected graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"]) graph.SetLineColor(ROOT.kBlack) graph.SetLineWidth(line_width) combo.Add(graph) legend.AddEntry(graph, "Expected (95% Bayesian)", "l") # observed graph = ROOT.TGraph(len(cur["observed_x"]), cur["observed_x"], cur["observed"]) graph.SetLineColor(ROOT.kRed + 1) graph.SetLineWidth(line_width) combo.Add(graph) legend.AddEntry(graph, "Observed (95% Bayesian)", "l") # Theory theories = { "narrow": ([theory.zprime, 1.2, False],), "wide": ([theory.zprime, 10.0, False],), "kk": ([theory.kkgluon, None, None],), }.get(limit_type) for index, (theory_function, theory_width, use_old_theory) in enumerate(theories, 0): class Theory(object): pass x, y, label = theory_function(theory_width, use_old_theory) if theory_width else theory_function() # remove all the points below 500 GeV x, y = zip(*[(xi, yi) for xi, yi in zip(x, y) if not 750 > xi]) graph = ROOT.TGraph(len(x), array("d", x), array("d", y)) graph.SetLineColor([ROOT.kBlue + 1, ROOT.kMagenta + 1, ROOT.kGreen + 1][index] if index < 3 else ROOT.kBlue + 1) graph.SetLineWidth(3) graph.SetLineStyle(2) combo.Add(graph) legend.AddEntry(graph, label, "l") theory_data = Theory() theory_data.x = x theory_data.y = y expected_exclusion, observed_exclusion = exclude(cur, theory_data) print("Expected exclusion:", expected_exclusion) print("Observed exclusion:", observed_exclusion) legend.AddEntry(g_1s, "#pm 1 s.d. Expected", "f") legend.AddEntry(g_2s, "#pm 2 s.d. Expected", "f") # Draw cv = ROOT.TCanvas() style.canvas(cv) combo.Draw("3al") legend.Draw() if logy: cv.SetLogy(True) if limit_type == "kk": style.combo( combo, ytitle="Upper Limit #sigma_{g_{KK}} x B [pb]", maximum=1e2 if logy else None, minimum=1e-2 if logy else None, ) else: style.combo(combo, maximum=1e2 if logy else None) style.legend(legend) legend.SetTextSize(0.04) plot_labels = labels.create( {"narrow": "Narrow Width Assumption", "wide": "10% Width Assumption", "kk": "KK Gluon Assumption"}.get( limit_type, None ) ) map(ROOT.TObject.Draw, plot_labels) cv.Update() cv.SaveAs("limits-{0}.pdf".format(limit_type))
def main(): if 4 != len(sys.argv): raise RuntimeError("insufficient arguments") data = load_data(*sys.argv[1:3]) # container for low and high limits class Limits(object): pass limits = Limits() limits.low = { "visible": None, "invisible": None } limits.high = copy.deepcopy(limits.low) # convert YAML to dictionary (limits.low["visible"], limits.low["invisible"]) = get_limits(data.low, is_low_mass=True) (limits.high["visible"], limits.high["invisible"]) = get_limits(data.high, is_low_mass=False) legend = ROOT.TLegend(0.5, 0.50, 0.80, 0.80) legend.SetHeader("Expected limits") line_width = 3 combo = ROOT.TMultiGraph() # expected cur = limits.low["visible"] graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"]) graph.SetLineColor(ROOT.kAzure - 7) graph.SetLineWidth(line_width) combo.Add(graph) legend.AddEntry(graph, "threshold", "l") cur = limits.low["invisible"] graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"]) graph.SetLineColor(ROOT.kAzure + 2) graph.SetLineWidth(line_width) graph.SetLineStyle(7) combo.Add(graph) cur = limits.high["visible"] graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"]) graph.SetLineColor(ROOT.kOrange + 2) graph.SetLineWidth(line_width) combo.Add(graph) legend.AddEntry(graph, "boosted", "l") # Draw cv = ROOT.TCanvas() style.canvas(cv) combo.Draw("3al") legend.Draw() cv.SetLogy(True) style.combo(combo) style.legend(legend) plot_labels = labels.create(sys.argv[3]) map(ROOT.TObject.Draw, plot_labels) cv.Update() cv.SaveAs("expected.png") raw_input("enter")
# -*- coding: UTF-8 -*- import numpy from sklearn import preprocessing def reform(datasets): new_datasets = [] scaler = None for dataset in datasets: new_dataset_x = [] new_dataset_y = [] for x, y in zip(dataset[0],dataset[1]): for i in range(0, len(x)/10*10, 10): new_dataset_x.append(x[i:i+10,:].flatten()) new_dataset_y.append(y) new_dataset_x = numpy.asarray(new_dataset_x) new_dataset_y = numpy.asarray(new_dataset_y) new_datasets.append((new_dataset_x, new_dataset_y)) return tuple(new_datasets) if __name__ == '__main__': from loader import load_data from feature_extractor import extract_features datasets = extract_features(load_data()[0]) new_datasets = reform(datasets) print new_datasets[0][0][0].shape
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='mnist.pkl.gz', nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegressionLayer(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = sgdVanilla(params, cost, learning_rate) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)))
#---------------------------------------------# def get_batch(in_data, batch_size): batch_imgs = np.empty([batch_size, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS], np.float32) batch_data = np.empty([batch_size, N_IN_DATA], np.float32) batch_desi = np.empty([batch_size, N_OUT_DATA], np.float32) for i in range(batch_size): x = randint(1, len(in_data)) - 1 batch_imgs[i], batch_data[i], batch_desi[i] = in_data[x].load_data() return batch_imgs, batch_data, batch_desi #---------------------------------------------# # Running the Network #---------------------------------------------# # Load the network and the data data = ld.load_data() nn = Network() if LOAD_NETWORK: nn.load_network(LOAD_LOCATION) # Main loop for i in tqdm(range(4000, 10000000)): # Generate the batch and train img_batch, data_batch, desired_batch = get_batch(data, BATCH_SIZE) loss = nn.train(img_batch, data_batch, desired_batch, USE_WEIGHTED_LOSS) # Print the loss if i % 20 == 0: print i, loss, CHECKPOINT_END # Save the network
def plot(limit_type, low_mass, high_mass, logy=True, smooth_data=True): ''' Application entry point ''' supported_limit_types = ("narrow", "wide", "kk") if limit_type not in supported_limit_types: raise RuntimeError("supported limit types: {0!r}".format( supported_limit_types)) data = load_data(low=low_mass, high=high_mass, scale_high=0.1) if smooth_data: smooth.data(data.low, n=40, log=logy) smooth.data(data.high, n=40, log=logy) # Computting splitting point based on expected values split_point = 0 mass_high_min = min(data.high.keys()) for mass_low in sorted(data.low.keys()): if mass_low < mass_high_min: continue mass_high = 0 for mass in sorted(data.high.keys()): if mass >= mass_low: mass_high = mass break if data.low[mass_low][0] > data.high[mass_high][0]: print(mass_low, mass_high) split_point = mass_high break class Limits(object): pass # container for low and high limits limits = Limits limits.low = { "visible": None, "invisible": None } limits.high = copy.deepcopy(limits.low) limits.low_fix_observed = copy.deepcopy(limits.low) (limits.low["visible"], limits.low["invisible"]) = get_limits(data.low, is_low_mass=True, split_point=split_point, transform_x=gev_to_tev) (limits.high["visible"], limits.high["invisible"]) = get_limits(data.high, is_low_mass=False, split_point=split_point, transform_x=gev_to_tev) (limits.low_fix_observed["visible"], limits.low_fix_observed["invisible"]) = get_limits(data.low, is_low_mass=True, split_point=split_point, low_mass_x=True, transform_x=gev_to_tev) legend = ROOT.TLegend(0.5, 0.50, 0.80, 0.88) line_width = 3 combo = ROOT.TMultiGraph() # 2 sigma band cur = limits.low["visible"] graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"], cur["xerr"], cur["xerr"], cur["two_sigma_down"], cur["two_sigma_up"]) graph.SetFillColor(ROOT.kGray + 1) graph.SetLineWidth(0) combo.Add(graph) g_2s_lm = graph cur = limits.high["visible"] graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"], cur["xerr"], cur["xerr"], cur["two_sigma_down"], cur["two_sigma_up"]) graph.SetFillColor(ROOT.kGray + 1) graph.SetLineWidth(0) combo.Add(graph) g_2s_hm = graph # 1 sigma band cur = limits.low["visible"] graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"], cur["xerr"], cur["xerr"], cur["one_sigma_down"], cur["one_sigma_up"]) graph.SetFillColor(ROOT.kGray) graph.SetLineWidth(0) combo.Add(graph) g_1s_lm = graph cur = limits.high["visible"] graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"], cur["xerr"], cur["xerr"], cur["one_sigma_down"], cur["one_sigma_up"]) graph.SetFillColor(ROOT.kGray) graph.SetLineWidth(0) combo.Add(graph) g_1s_hm = graph # expected cur = limits.low["visible"] graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"]) graph.SetLineColor(ROOT.kBlack) graph.SetLineWidth(line_width) combo.Add(graph) legend.AddEntry(graph, "Expected (95% CL)", "l") cur = limits.high["visible"] graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"]) graph.SetLineColor(ROOT.kBlack) graph.SetLineWidth(line_width) combo.Add(graph) # observed cur = limits.low_fix_observed["visible"] graph = ROOT.TGraph(len(cur["observed_x"]), cur["observed_x"], cur["observed"]) graph.SetLineColor(ROOT.kRed + 1) graph.SetLineWidth(line_width) graph.SetLineStyle(2) combo.Add(graph) legend.AddEntry(graph, "Observed (95% CL)", "l") cur = limits.high["visible"] graph = ROOT.TGraph(len(cur["observed_x"]), cur["observed_x"], cur["observed"]) graph.SetLineColor(ROOT.kRed + 1) graph.SetLineWidth(line_width) graph.SetLineStyle(2) combo.Add(graph) # Theory theories = { "narrow": ([theory.zprime, 1.2, False], ), "wide": ([theory.zprime, 10.0, False], ), "kk": ([theory.kkgluon, None, None], ) }.get(limit_type) for index, (theory_function, theory_width, use_old_theory) in enumerate(theories, 0): class Theory(object): pass x, y, label = (theory_function(theory_width, use_old_theory) if theory_width else theory_function()) # remove all the points below 500 GeV x, y = zip(*[(xi, yi) for xi, yi in zip(x, y) if not 500 > xi]) x = gev_to_tev(x) graph = ROOT.TGraph(len(x), array('d', x), array('d', y)) graph.SetLineColor([ROOT.kBlue + 1, ROOT.kMagenta + 1, ROOT.kGreen + 1][index] if index < 3 else ROOT.kBlue + 1) graph.SetLineWidth(3) graph.SetLineStyle(9) combo.Add(graph) legend.AddEntry(graph, label, "l") theory_data = Theory() theory_data.x = x theory_data.y = y ''' print("low mass".capitalize()) expected_exclusion, observed_exclusion = exclude(limits.low["visible"], theory_data) print("Expected exclusion:", expected_exclusion) print("Observed exclusion:", observed_exclusion) print() print("high mass".capitalize()) expected_exclusion, observed_exclusion = exclude(limits.high["visible"], theory_data) print("Expected exclusion:", expected_exclusion) print("Observed exclusion:", observed_exclusion) ''' legend.AddEntry(g_1s_lm, "Expected #pm 1 s.d.", "f") legend.AddEntry(g_2s_lm, "Expected #pm 2 s.d.", "f") # Draw cv = ROOT.TCanvas() style.canvas(cv) combo.Draw("3al") # Split point line = ROOT.TGraph(2) line.SetPoint(0, split_point * 1e-3, 1e1) line.SetPoint(1, split_point * 1e-3, 5e-3) line.SetLineColor(ROOT.kGray + 2) line.SetLineStyle(9) line.SetLineWidth(3) line.Draw("L") legend.Draw() if logy: cv.SetLogy(True) if limit_type == 'kk': style.combo(combo, maximum=4e2 if logy else None, minimum=1e-2 if logy else None, ytitle="Upper Limit #sigma_{g_{KK}} x B [pb]") else: style.combo(combo, maximum=4e2 if logy else None) style.legend(legend) legend.SetTextSize(0.04) plot_labels = labels.create({ #"narrow": "{0:.1f}% Width Assumption".format(theory_width if theory_width else 0), "narrow": "Z' with 1.2% Decay Width", "wide": "Z' with 10% Decay Width", "kk": "KK Gluon"}.get(limit_type, None)) map(ROOT.TObject.Draw, plot_labels) cv.Update() cv.SaveAs("limits-{0}.pdf".format(limit_type))
mfcc_feats = mfcc(sig, rate) def diff(feats): feats_diff = numpy.zeros(feats.shape) for i in range(2, feats.shape[0]-2): feats_diff[i,:] = 2*feats[i-2,:] - feats[i-2,:] + feats[i+1,:] + 2*feats[i+2,:] return feats_diff mfcc_diff_feats = diff(mfcc_feats) mfcc_diff2_feats = diff(mfcc_diff_feats) _, energy_feat = fbank(sig, rate) log_energy_feat = numpy.log(energy_feat).reshape(energy_feat.shape[0],1) return numpy.concatenate((mfcc_feats, mfcc_diff_feats, mfcc_diff2_feats, log_energy_feat), axis=1)[2:-2] new_datasets = [] for dataset in datasets: new_datasets.append(([get_features(sample) for sample in dataset[0]], dataset[1])) return tuple(new_datasets) if __name__ == '__main__': from loader import load_data datasets, n_classes = load_data() new_datasets = extract_features(datasets) for data in new_datasets[0][0]: print data.shape
import matplotlib.pyplot as plt from keras.layers import Dense, Activation, Flatten from keras.layers import Convolution2D, MaxPooling2D from keras.models import Sequential from keras.optimizers import SGD from loader import load_data from params import img_rows, img_cols, nb_classes (X_train, Y_train), (X_test, Y_test) = load_data() def get_model(): model = Sequential() model.add(Convolution2D(32, 7, 7, input_shape=(1, img_rows, img_cols), activation='relu', init='he_normal')) model.add(Convolution2D(48, 5, 5, activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(1024, activation='relu')) model.add(Dense(nb_classes)) model.add(Activation('softmax')) sgd = SGD(momentum=0.9, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) return model
def plot(limit_type, low_mass, high_mass, logy=True): ''' Application entry point ''' supported_limit_types = ("narrow", "wide", "kk") if limit_type not in supported_limit_types: raise RuntimeError("supported limit types: {0!r}".format( supported_limit_types)) data = load_data(low_mass, high_mass) class Limits(object): pass # container for low and high limits limits = Limits limits.low = { "visible": None, "invisible": None } limits.high = copy.deepcopy(limits.low) limits.low_fix_observed = copy.deepcopy(limits.low) # convert YAML to dictionary split_point = { "narrow": 1000, "wide": 1100, "kk": 1000 }.get(limit_type) (limits.low["visible"], limits.low["invisible"]) = get_limits(data.low, is_low_mass=True, split_point=split_point, transform_x=gev_to_tev) (limits.high["visible"], limits.high["invisible"]) = get_limits(data.high, is_low_mass=False, split_point=split_point, transform_x=gev_to_tev) (limits.low_fix_observed["visible"], limits.low_fix_observed["invisible"]) = get_limits(data.low, is_low_mass=True, split_point=split_point, low_mass_x=True, transform_x=gev_to_tev) smooth.data(limits.high["visible"], n=2) legend = ROOT.TLegend(0.5, 0.50, 0.80, 0.88) line_width = 3 combo = ROOT.TMultiGraph() # 2 sigma band cur = limits.low["visible"] graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"], cur["xerr"], cur["xerr"], cur["two_sigma_down"], cur["two_sigma_up"]) graph.SetFillColor(ROOT.kGray + 1) graph.SetLineWidth(0) combo.Add(graph) g_2s_lm = graph cur = limits.high["visible"] graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"], cur["xerr"], cur["xerr"], cur["two_sigma_down"], cur["two_sigma_up"]) graph.SetFillColor(ROOT.kGray + 1) graph.SetLineWidth(0) combo.Add(graph) g_2s_hm = graph # 1 sigma band cur = limits.low["visible"] graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"], cur["xerr"], cur["xerr"], cur["one_sigma_down"], cur["one_sigma_up"]) graph.SetFillColor(ROOT.kGray) graph.SetLineWidth(0) combo.Add(graph) g_1s_lm = graph cur = limits.high["visible"] graph = ROOT.TGraphAsymmErrors(len(cur["x"]), cur["x"], cur["expected"], cur["xerr"], cur["xerr"], cur["one_sigma_down"], cur["one_sigma_up"]) graph.SetFillColor(ROOT.kGray) graph.SetLineWidth(0) combo.Add(graph) g_1s_hm = graph # expected cur = limits.low["visible"] graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"]) graph.SetLineColor(ROOT.kBlack) graph.SetLineWidth(line_width) combo.Add(graph) legend.AddEntry(graph, "Expected (95% CL)", "l") cur = limits.high["visible"] graph = ROOT.TGraph(len(cur["x"]), cur["x"], cur["expected"]) graph.SetLineColor(ROOT.kBlack) graph.SetLineWidth(line_width) combo.Add(graph) # observed cur = limits.low_fix_observed["visible"] graph = ROOT.TGraph(len(cur["observed_x"]), cur["observed_x"], cur["observed"]) graph.SetLineColor(ROOT.kRed + 1) graph.SetLineWidth(line_width) combo.Add(graph) legend.AddEntry(graph, "Observed (95% CL)", "l") cur = limits.high["visible"] graph = ROOT.TGraph(len(cur["observed_x"]), cur["observed_x"], cur["observed"]) graph.SetLineColor(ROOT.kRed + 1) graph.SetLineWidth(line_width) combo.Add(graph) # Theory theories = { "narrow": ([theory.zprime, 1.2, False], ), "wide": ([theory.zprime, 10.0, False], ), "kk": ([theory.kk, None, None], ) }.get(limit_type) for index, (theory_function, theory_width, use_old_theory) in enumerate(theories, 0): class Theory(object): pass x, y, label = (theory_function(theory_width, use_old_theory) if theory_width else theory_function()) # remove all the points below 500 GeV x, y = zip(*[(xi, yi) for xi, yi in zip(x, y) if not 500 > xi]) x = gev_to_tev(x) graph = ROOT.TGraph(len(x), array('d', x), array('d', y)) graph.SetLineColor([ROOT.kBlue + 1, ROOT.kMagenta + 1, ROOT.kGreen + 1][index] if index < 3 else ROOT.kBlue + 1) graph.SetLineWidth(3) graph.SetLineStyle(2) combo.Add(graph) legend.AddEntry(graph, label, "l") theory_data = Theory() theory_data.x = x theory_data.y = y if limit_type != 'kk': print("low mass".capitalize()) expected_exclusion, observed_exclusion = exclude(limits.low["visible"], theory_data) print("Expected exclusion:", expected_exclusion) print("Observed exclusion:", observed_exclusion) print() print("high mass".capitalize()) expected_exclusion, observed_exclusion = exclude(limits.high["visible"], theory_data) print("Expected exclusion:", expected_exclusion) print("Observed exclusion:", observed_exclusion) legend.AddEntry(g_1s_lm, "#pm 1 #sigma Expected", "f") legend.AddEntry(g_2s_lm, "#pm 2 #sigma Expected", "f") # Draw cv = ROOT.TCanvas() style.canvas(cv) combo.Draw("3al") # Split point if limit_type != 'kk': line = ROOT.TGraph(2) line.SetPoint(0, split_point * 1e-3, 1e1) line.SetPoint(1, split_point * 1e-3, 3e-2) line.SetLineColor(ROOT.kGray + 2) line.SetLineStyle(2) line.SetLineWidth(3) line.Draw("L") legend.Draw() if logy: cv.SetLogy(True) style.combo(combo, maximum=1e2 if logy else None) combo.GetXaxis().SetRangeUser(0, 4) style.legend(legend) legend.SetTextSize(0.04) plot_labels = labels.create({ #"narrow": "{0:.1f}% Width Assumption".format(theory_width if theory_width else 0), "narrow": "Z' with 1% Decay Width", "wide": "Z' with 10% Decay Width", "kk": "KK Gluon Assumption"}.get(limit_type, None)) map(ROOT.TObject.Draw, plot_labels) cv.Update() cv.SaveAs("limits-{0}.pdf".format(limit_type))
def load_data(filename): return ld.load_data(filename)
#!/usr/bin/env python ''' Created by Samvel Khalatyan, Jun 03, 2012 Copyright 2012, All rights reserved ''' from __future__ import division import copy import ROOT from loader import load_data,get_limits data = load_data() class Limits(object): pass limits = Limits limits.low = { "visible": None, "invisible": None } limits.high = copy.deepcopy(limits.low) (limits.low["visible"], limits.low["invisible"]) = get_limits(data.low, is_low_mass=True) (limits.high["visible"],
import falcon import default_handler from loader import load_data import logging import sys root = logging.getLogger() root.setLevel(logging.DEBUG) # ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.DEBUG) ch.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) root.addHandler(ch) MAPPING = load_data() api = application = falcon.API() for url, expectations in MAPPING.items(): print(url) logging.info('Route added -->> %s' % url) api.add_route('%s' % url, default_handler.DefaultHandler(expectations)) if __name__ == '__main__': httpd = simple_server.make_server('127.0.0.1', 8000, application) httpd.serve_forever()
features_listB = ['to_messages', 'from_poi_to_this_person', 'from_messages', 'from_this_person_to_poi', 'shared_receipt_with_poi'] features_listC = ['maildata', 'to_ratio', 'from_ratio', 'comm', 'comm_sum', 'comm_max', 'comm_min', 'comm_ratio', 'comm2', 'from_ratio_log'] features_label = ['poi'] features_list_full = features_label + features_listA + features_listB + features_listC features_list = features_list_full ### Load modified dataset as my_dataset my_dataset = load_data() print 'initial features count: ', len(features_listA + features_listB) print 'total datapoints count: ', len(my_dataset) ### Extract the labels and selected features from my dataset data = featureFormat(my_dataset, features_list, sort_keys = True) labels, features = targetFeatureSplit(data) ### Task 3.2: Evaluate features print '\nInitial features:' print features_list
def run_simulation(self, save_to_dot=True, save_to_json=True): #import Seed and lifetable data this_generation_population = Population() next_generation_population = None seed_group = seed.load_group(this_generation_population) table_data = loader.load_data() lifetable = table_data.life_table dispersal_table =\ table_data.dispersal_table random_module = RandomModule() #create analytics lists age_record_list = [] population_record_list = [] male_population_record_list = [] female_population_record_list = [] real_birth_rate_list = [] real_death_rate_list = [] edges_per_agent_list = [] adult_males_list = [] adult_females_list = [] adult_females_per_males_list = [] total_agent_relationships_list = [] group_composition_list = [] death_counter = Counter() #used to make sure the correct number #of deaths occur birth_counter = Counter() #used to make sure the correct number #of births take place #assign all_groups by creating several copies of the #seed generation for i in range(0, self.NUMBER_OF_SEED_GROUPS + 1): this_generation_population.add_group(copy.deepcopy(seed_group)) """ I was having a strange error where the 0th group was loaded incorrectly. This is a temporary fix """ del this_generation_population.groups[0] for i in range (0, self.NUMBER_OF_GENERATIONS): self.per_generation_printout(i) #analytics this_age_record = [] this_population_record = 0 this_male_population_record = 0 this_female_population_record = 0 this_edges_per_agent = 0 this_generation_adult_males = 0 this_generation_adult_females = 0 this_generation_group_composition_list = [] #reset counters death_counter.reset() birth_counter.reset() #make the next gen population a copy of this gen's pop this_generation_population.generation = i next_generation_population =\ copy.deepcopy(this_generation_population) #run the simulation for each sub_group. for j in range(0, len(this_generation_population.groups)): this_generation = this_generation_population.groups[j] new_generation = next_generation_population.groups[j] females_to_male =\ this_generation.get_females_to_male() for agent_index in this_generation.whole_set: #print str(agent_index) + ", " + str(len(this_generation.agent_array)) this_agent =\ this_generation.agent_dict[agent_index] new_agent =\ new_generation.agent_dict[agent_index] #increment age new_generation.promote_agent(new_agent) #check birth_rate if this_agent.index in this_generation.female_set: chance_of_birth =\ lifetable.chance_of_birth(females_to_male, this_agent.age) #check for birth self.check_for_birth(this_generation, new_generation, this_agent, new_agent, females_to_male, agent_index, lifetable, random_module, birth_counter, male_population_record_list) #check for death self.check_for_death(lifetable, females_to_male, this_agent, new_agent, new_generation, random_module, death_counter) #check for dispersal self.check_for_dispersal(dispersal_table, females_to_male, this_agent, new_agent, this_generation, new_generation, this_generation_population, next_generation_population, random_module) #check for friendships friendships.check_for_friendships(this_agent, new_agent, this_generation, new_generation, random_module) #unique changes self.conduct_changes_unique_to_experiment_at_agent( this_generation_population, next_generation_population, this_generation, new_generation, this_agent, new_agent, females_to_male, lifetable, random_module, table_data ) #analytics this_edges_per_agent += this_agent.edges() this_age_record.append(this_agent.age) this_population_record += 1 if (this_agent.index in this_generation.male_set): this_male_population_record += 1 elif (this_agent.index in this_generation.female_set): this_female_population_record += 1 this_generation_adult_males +=\ len(this_generation.male_set) this_generation_adult_females +=\ len(this_generation.female_set) this_generation_group_composition_list.append( len(this_generation.whole_set) ) self.conduct_changes_unique_to_experiment_at_gen( this_generation_population, next_generation_population, i, self.NUMBER_OF_GENERATIONS, table_data) #set the old gen to the new one this_generation_population = next_generation_population group_composition_list.append(this_generation_group_composition_list) number_of_groups = len(this_generation_population.groups) adult_males_per_group =\ float(this_generation_adult_males)/number_of_groups adult_females_per_group =\ float(this_generation_adult_females)/number_of_groups adult_males_list.append(adult_males_per_group) adult_females_list.append(adult_females_per_group) #handle div by 0 errors in calculating #females per male if (adult_males_per_group == 0): adult_females_per_males_list.append( adult_females_per_group/1 ) elif (adult_females_per_group == 0): adult_females_per_males_list.append(0) else: adult_females_per_males_list.append( float(adult_females_per_group)/float(adult_males_per_group) ) if (save_to_dot): self.save_data_to_dot(this_generation_population.get_dot_string(), i) if (save_to_json): self.save_data_to_json(this_generation_population.get_json_string(), i) average_edges_per_agent =\ float(this_edges_per_agent)/this_population_record edges_per_agent_list.append(average_edges_per_agent) real_death_rate_list.append( float(death_counter.getCount())/this_population_record) real_birth_rate_list.append( float(birth_counter.getCount())/this_population_record) age_record_list.append(this_age_record) male_population_record_list.append(this_male_population_record) female_population_record_list.append( this_female_population_record) population_record_list.append(this_population_record) total_agent_relationships_list = ( this_generation_population.\ get_population_relationship_stats()) self.save_data(population_record_list, male_population_record_list, female_population_record_list, age_record_list, real_birth_rate_list, real_death_rate_list, edges_per_agent_list, adult_females_per_males_list, group_composition_list, total_agent_relationships_list) print (birth_counter.getCount()) print (death_counter.getCount())
Run on GPU: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python mnist_cnn.py Get to 99.25% test accuracy after 12 epochs (there is still a lot of margin for parameter tuning). 16 seconds per epoch on a GRID K520 GPU. ''' batch_size = 128 nb_classes = 10 nb_epoch = 12 img_rows, img_cols = 28, 28 # input image dimensions nb_filters = 32 # number of convolutional filters to use nb_pool = 2 # size of pooling area for max pooling nb_conv = 3 #3 # convolution kernel size # the data, shuffled and split between tran and test sets (X_train, y_train), (X_test, y_test) = loader.load_data("mnist.pkl") checkpoint = tm.time() X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) X_train = X_train.astype("float32") X_test = X_test.astype("float32") X_train /= 255 X_test /= 255 print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') X_real_test = pd.read_csv('test.csv').values X_real_test = X_real_test.reshape(X_real_test.shape[0], 1, img_rows, img_cols) X_real_test = X_real_test.astype("float32")
def setUp(self): self.data = loader.load_data()
import numpy as np from loader import load_data from regression.mlp2 import MLP n_in = 22 n_hidden = 40 n_out = 1 dropout_prob = 0.5 learning_rate = 0.1 n_epochs = 500 batch_size = 10 weight_decay = 0.00001 K = 10 x, y = load_data('../data2.txt') step = len(x) / K + 1 mse = 0.0 for i in np.arange(0, len(x) + 1, step): valid_set = x[i:i + step, :], y[i:i + step] train_set = np.delete(x, range(i, i + step), axis=0), np.delete(y, range(i, i + step)) datasets = {'train': train_set, 'valid': valid_set} ml = MLP(n_in, n_hidden, n_out, dropout_prob, learning_rate, weight_decay) mse += ml.test_mlp(datasets,n_epochs, batch_size) print 'mse:', mse/K