def go(dset=None, path=KINECT_PATH, threed=False, skip=1): if dset is None: dataset.load_random_dataset(path) else: dataset.load_dataset(dset) for rgbs, depths in dataset.iter(skip=skip): once(rgbs, depths, threed=threed)
def load_test_dataset(): """ load and returns testing dataset and labels returns : test_x, test_y (all numpy arrays) """ test_x = load_dataset(TEST_X) test_y = load_dataset(TEST_Y) return test_x, test_y
def go(dset=None): if dset is None: dataset.load_random_dataset() else: dataset.load_dataset(dset) while True: dataset.advance() once()
def load_all_datasets(): """ load and returns training, testing dataset and labels returns : train_x, train_y, test_x, test_y (all numpy arrays) """ train_x = load_dataset(TRAIN_X) train_y = load_dataset(TRAIN_Y) test_x = load_dataset(TEST_X) test_y = load_dataset(TEST_Y) return train_x, train_y, test_x, test_y
def main(product): TRAIN_FILE = "../data/ABSA-15_{}_Train_Data.xml".format(product) TEST_FILE = "../data/ABSA15_{}_Test.xml".format(product) # load data set training_reviews = load_dataset(TRAIN_FILE) testing_reviews = load_dataset(TEST_FILE) # build vocab vocab = build_vocab(training_reviews, TOPN=1000) vocab_index = list2dict(vocab) cate_index = get_all_categories(training_reviews) cates = dict2list(cate_index) n_cates = len(cates) train_X = get_X(training_reviews, vocab_index) test_X = get_X(testing_reviews, vocab_index) train_labels = get_labels(training_reviews, cate_index) test_labels = get_labels(testing_reviews, cate_index) # transtform to mono-label problem M = len(train_X) X = [] Y = [] for i in range(M): if not train_labels[i]: Y.append(n_cates) # category index from 0 to n_cates-1, n_cates is for None-label X.append(train_X[i]) else: for y in train_labels[i]: Y.append(y) X.append(list(train_X[i])) clf_model = MultinomialNB() clf_model.fit(X, np.array(Y)) # predict output = predict(test_X, clf_model, threshold=0.2) # evaluation p, r, f = microF1(output, test_labels) # output out_dir = "../data/bow_nb/" out_file = out_dir + "laptop.txt" with open(out_file, 'w') as out: out.write("Precision:\t{}\nRecall:\t{}\nF1:\t{}\n".format(p, r, f)) print("{}\n{}\n{}".format(p, r, f))
def main(): #load data set training_reviews = load_dataset(TRAIN_FILE) testing_reviews = load_dataset(TEST_FILE) #load doc2vec model doc2vec_model = Doc2Vec.load(DOC2VEC_MODEL) cate_index = get_all_categories(training_reviews) cates = dict2list(cate_index) n_cates = len(cates) train_X = get_X(training_reviews, doc2vec_model) test_X = get_X(testing_reviews, doc2vec_model) train_labels = get_labels(training_reviews, cate_index) test_labels = get_labels(testing_reviews, cate_index) labelwise_acc = [] labelwise_output = [] for cate in range(n_cates): # train a bonary model train_Y = get_Y(train_labels, cate) prob = svm_problem(train_Y, train_X) param = svm_parameter("-s 0 -t 2 -b 1") m = svm_train(prob, param) # test test_Y = get_Y(test_labels, cate) p_label, p_acc, p_val = svm_predict(test_Y, test_X, m, '-b 1') labelwise_acc.append(p_acc) labelwise_output.append(p_label) # evaluation p, r, f = microF1(labelwise_output, test_labels) # output out_dir = "../data/use_doc2vec/" out_file = out_dir + "laptop.txt" labelwise_acc = [(cates[i], labelwise_acc[i][0]) for i in range(n_cates)] labelwise_acc = sorted(labelwise_acc, key=lambda x:x[1]) with open(out_file, 'w') as out: out.write("Precision:\t{}\nRecall:\t{}\nF1:\t{}\n".format(p, r, f)) print("{}\n{}\n{}".format(p, r, f)) for cate_i in range(n_cates): out.write("{}:\t{}\n".format(labelwise_acc[cate_i][0], labelwise_acc[cate_i][1]))
def train_regression_model(): num_batches = 1848 total_iterations = 90000 batch_size = 64 num_epochs = ceil(total_iterations / num_batches) initial_epoch = 0 regression_model_file = 'model_%02d.hdf5' % initial_epoch regression_model = None if os.path.exists(regression_model_file): print('Loading from saved file.') regression_model = model.get_regression_model(regression_model_file) else: print('Start training from scratch.') regression_model = model.create_regression_model() regression_model.summary() progbar = ProgbarLogger('steps') checkpoint = ModelCheckpoint('model_{epoch:02d}.hdf5', verbose=1, monitor='loss') terminate = TerminateOnNaN() callbacks = [checkpoint, progbar, terminate] regression_model.fit_generator( generator=dataset.load_dataset(training_dir), steps_per_epoch=num_batches, epochs=num_epochs, callbacks=callbacks, initial_epoch=initial_epoch, verbose=1)
def prepare_data(dataset, pca_n): global n_classes, X, y, pp, X_tr, X_inv n_classes = len(dataset) X, y = load_dataset(dataset) pp = Preprocess(pca_n) X_tr = pp.fit_transform(X) X_inv = pp.inverse_transform(X_tr)
def get_df_from_file_path(file_path, net="NET1"): REAL_BEHAVIORAL_DATA = config.Config.get("REAL_BEHAVIORAL_DATA") CACHED_BEHAVIORAL_DATA = config.Config.get("CACHED_BEHAVIORAL_DATA") if REAL_BEHAVIORAL_DATA: cache_file_path1 = file_path.replace(".mat", "net1.csv") cache_file_path2 = file_path.replace(".mat", "net3.csv") cache_file_path = { "NET1": cache_file_path1, "NET3": cache_file_path2 }[net] if CACHED_BEHAVIORAL_DATA and os.path.isfile(cache_file_path): print("loading cached file....") df = pd.read_csv(cache_file_path) else: print("parsing real data file") df, df2 = behavior_parse.parse_matlab_file(file_path) print("storing to cache...") df.to_csv(cache_file_path1) df2.to_csv(cache_file_path2) df = df if cache_file_path == cache_file_path1 else df2 else: df = dataset.load_dataset() if "Unnamed: 0" in df.columns: df.drop(columns=["Unnamed: 0"], inplace=True) return df
def test(dataset, batch_size, filters, context): datasets = { "facades": True, "cityscapes": False, "maps": False, "edges2shoes": False, "edges2handbags": False } mx.random.seed(int(time.time())) print("Loading dataset...", flush=True) validating_set = load_dataset(dataset, "val", batch_size, is_reversed=datasets[dataset]) net_g = UnetGenerator(3, filters) net_g.load_parameters("model/{}.generator.params".format(dataset), ctx=context) print("Testing...", flush=True) for batch in validating_set: real_in = batch.data[0].as_in_context(context) real_out = batch.data[1].as_in_context(context) fake_out = net_g(real_in) for i in range(batch_size): plt.subplot(3, batch_size, i + 1) visualize(real_in[i]) plt.subplot(3, batch_size, i + batch_size + 1) visualize(real_out[i]) plt.subplot(3, batch_size, i + batch_size * 2 + 1) visualize(fake_out[i]) plt.show()
def main(_): if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) if not os.path.exists(FLAGS.log_dir): os.makedirs(FLAGS.log_dir) file_handler = logging.FileHandler(os.path.join(FLAGS.log_dir, "log.txt")) logging.getLogger().addHandler(file_handler) dataset = load_dataset(FLAGS.small_train_set) denoise = get_model(FLAGS.model)(FLAGS) train_dir = os.path.join(FLAGS.train_dir, denoise.model_name) if not os.path.exists(train_dir): os.makedirs(train_dir) print(vars(FLAGS)) with open(os.path.join(FLAGS.log_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) with tf.Session() as sess: _, epoch = initialize_model(sess, denoise, train_dir) denoise.train(sess, dataset, epoch)
def main(train_set_filename, test_set_filename, config_filename): train_set = load_dataset(train_set_filename) test_set = load_dataset(test_set_filename) config = load_config(config_filename) if config.model == Model.ID3: tree = decisiontree.id3(train_set, config.max_depth) print(decisiontree.show_decision_tree(tree)) predictor = assessment.make_predictor(tree, decisiontree.predict) print_whole_assessment(predictor, test_set) elif config.model == Model.RF: forest = randomforest.train(train_set, config.num_trees, config.max_depth, config.example_ratio, config.feature_ratio) randomforest.print_forest(forest) predictor = assessment.make_predictor(forest, randomforest.predict) print_whole_assessment(predictor, test_set)
def generate(network_pkl, out_dir): if os.path.exists(out_dir): raise ValueError('{} already exists'.format(out_dir)) misc.init_output_logging() np.random.seed(config.random_seed) tfutil.init_tf(config.tf_config) with tf.device('/gpu:0'): G, D, Gs = misc.load_pkl(network_pkl) training_set = dataset.load_dataset(data_dir=config.data_dir, verbose=True, **config.dataset) # grid_size, grid_reals, grid_labels, grid_latents = train.setup_snapshot_image_grid(G, training_set, **config.grid) number_of_images = 1000 grid_labels = np.zeros([number_of_images, training_set.label_size], dtype=training_set.label_dtype) grid_latents = misc.random_latents(number_of_images, G) total_kimg = config.train.total_kimg sched = train.TrainingSchedule(total_kimg * 1000, training_set, **config.sched) grid_fakes = Gs.run(grid_latents, grid_labels, minibatch_size=sched.minibatch // config.num_gpus) os.makedirs(out_dir) # print(np.min(grid_fakes), np.mean(grid_fakes), np.max(grid_fakes)) # misc.save_image_grid(grid_fakes, 'fakes.png', drange=[-1,1], grid_size=grid_size) for i, img in enumerate(grid_fakes): img = img.transpose((1, 2, 0)) img = np.clip(img, -1, 1) img = (1 + img) / 2 img = skimage.img_as_ubyte(img) imageio.imwrite(os.path.join(out_dir, '{}.png'.format(i)), img[..., :3]) if img.shape[-1] > 3: np.save(os.path.join(out_dir, '{}.npy'.format(i)), img)
def main(verbose): dataset = load_dataset( glob('../data/trump_tweet_data_archive/condensed_*.json.zip'), verbose) corpus, sequences, next_chars, c2i, i2c, nc = seq_data( dataset, SEQ_LEN, SEQ_STEP, verbose) if verbose: print(f'corpus length: {len(corpus)}') print(f'num characters: {nc}') print(f'number of sequences: {len(sequences)}') # The data is shuffled so the validation data isn't simply the latest 20% of tweets X, y = vec_data(sequences, next_chars, SEQ_LEN, nc, c2i, verbose) # Split off the last 20% as validation data for pretty graphs n = len(X) num_val = int(PERCENT_VALIDATION * n) X_val = X[n - num_val:] y_val = y[n - num_val:] X_train = X[:n - num_val] y_train = y[:n - num_val] if verbose: print(f'Number validation samples: {num_val}') model = build_model(SEQ_LEN, nc, verbose) history = train_model(model, X_train, y_train, X_val, y_val, verbose) plot_model_loss(BASENAME, history, verbose) # Save the trained model so we don't have to wait 25 hours to generate another 10 tweet sample save_model(model, BASENAME, verbose) # Generate sample tweets using 10 random seeds from the corpus. generate(BASENAME, model, corpus, c2i, i2c, nc, 10, verbose)
def main(): train_data,test_data,user_bundle_data,user_item_data,bundle_item_data,\ item_num,user_num,bundle_num,user_bundle_mat = dataset.load_dataset() train_dataset = data_prep.CreateData(train_data, bundle_num, user_bundle_mat, args.train_neg_num, True) test_dataset = data_prep.CreateData(test_data, bundle_num, user_bundle_mat, 0, False) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) test_loader = data.DataLoader(test_dataset, batch_size=99 + 1, shuffle=False, num_workers=0) ub_graph, ui_graph, bi_graph = get_graph(train_data, user_item_data, bundle_item_data, item_num, user_num, bundle_num) graph = [ub_graph, ui_graph, bi_graph] #print(args) model = IHBR(args, item_num, user_num, bundle_num, graph, device).to(device) op = optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-7) loss_func = nn.BCEWithLogitsLoss() loss = train(model, args.epochs, train_loader, op, device, loss_func, test_loader)
def questao21(): dset = load_dataset('dataset1.csv') xo = dset.T[1].astype(float) # segunda coluna x = dset.T[1].astype(float) # segunda coluna yo = dset.T[2].astype(float) # terceira coluna y = dset.T[2].astype(float) # terceira coluna # a normalização com z-score ajudou na visualização e é necessária para agrupamento #x = [z_score(x, xi) for xi in x] #y = [z_score(y, yi) for yi in y] #centros_iniciais = [(z_score(xo, 1), z_score(yo, 2)), (z_score(xo, 4), z_score(yo, 2))] centros_iniciais = [(1, 2), (4, 2)] pontos = zip(x, y) clusters, iteracoes = kmeans(pontos, 2, centros_iniciais=centros_iniciais) cluster1 = clusters[0].pontos cluster2 = clusters[1].pontos plt.plot([xi[0] for xi in cluster1], [yi[1] for yi in cluster1], 'ro') plt.plot([clusters[0].centroide[0]], [clusters[0].centroide[1]], 'r*') plt.plot([xi[0] for xi in cluster2], [yi[1] for yi in cluster2], 'go') plt.plot([clusters[1].centroide[0]], [clusters[1].centroide[1]], 'g*') plt.savefig('grupo1.png') print "Novos centróides:", clusters[0].centroide, " e ", clusters[ 1].centroide
def plot_goal_reached_distribution(runs_dir, img_dir, filename): """ :param runs_dir: :param img_dir: :param filename: """ dataset_states = load_dataset(runs_dir) time_steps = np.arange(dataset_states.step.max() + 1) states_subset = dataset_states[["step", "goal_reached"]] last_steps = states_subset.groupby("run").map( lambda x: x.isel(sample=[-1])) false_label, false_samples = False, last_steps.where( last_steps.goal_reached == False, drop=True) true_label, true_samples = True, last_steps.where( last_steps.goal_reached == True, drop=True) plt.figure(figsize=(7.8, 4.8), constrained_layout=True) plt.hist([true_samples.step, false_samples.step], bins=time_steps, label=[true_label, false_label], stacked=True, alpha=0.9) plt.ylim(0, plt.ylim()[1] + 1) plt.legend() plt.xlim(0, dataset_states.step.max() + 1) plt.xlabel('timestep', fontsize=11) plt.ylabel('runs', fontsize=11) save_visualisation(filename, img_dir)
def plot_sensors(goal_object, runs_dir, video_dir, filename, run_id=0): """ :param goal_object: :param runs_dir: :param video_dir: :param filename: :param run_id """ dataset_states = load_dataset(runs_dir) run_states = dataset_states.where(dataset_states.run == run_id, drop=True) marxbot = viz.DatasetSource(run_states) # Create the visualizations env = viz.FuncAnimationEnv([ viz.GridLayout((1, 3), [ viz.TrajectoryViz(marxbot, goal_object=goal_object), viz.LaserScannerViz(marxbot), viz.ControlSignalsViz(marxbot) ], suptitle='Run %d' % run_id) ], sources=[marxbot]) env.show(figsize=(14, 4)) video_path = os.path.join(video_dir, '%s-%d.mp4' % (filename, run_id)) env.save(video_path, dpi=300)
def load_processed_dataset(diags): from keras.utils import np_utils xy = load_dataset() X = xy["x"] annotation = load_annotation() X = np.concatenate((X, annotation), axis=2) Y = xy["y"] Y_new = np.zeros(Y.shape[0]) for i in range(Y.shape[0]): for j in diags: if Y[i, j] == 1: Y_new[i] = 1 Y = np_utils.to_categorical(Y_new, 2) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42) X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.25, random_state=42) return X_train, X_val, X_test, Y_train, Y_val, Y_test
def run_sswe_u(window_size, training_file, vocab_size, embedding_size, alpha=0.5, num_negative_samples=15): model = model_sswe_u(window_size, vocab_size, embedding_size) sswe_u_loss = custom_loss(alpha=alpha) model.compile(optimizer=Adagrad(lr=0.01), loss=sswe_u_loss, metrics=['accuracy']) print(model.summary()) inputs, labels = load_dataset(window_size, training_file, vocab_size, num_negative_samples=num_negative_samples) print(labels.shape) model.fit([inputs[:, 0, :], inputs[:, 1, :]], labels, epochs=2, batch_size=10000, shuffle=True) weights = model.get_layer('embedding').get_weights()[0] np.save('word_embedding.npy', weights) return weights
def questao21(): dset = load_dataset('dataset1.csv') xo = dset.T[1].astype(float) # segunda coluna x = dset.T[1].astype(float) # segunda coluna yo = dset.T[2].astype(float) # terceira coluna y = dset.T[2].astype(float) # terceira coluna # a normalização com z-score ajudou na visualização e é necessária para agrupamento #x = [z_score(x, xi) for xi in x] #y = [z_score(y, yi) for yi in y] #centros_iniciais = [(z_score(xo, 1), z_score(yo, 2)), (z_score(xo, 4), z_score(yo, 2))] centros_iniciais = [(1,2), (4,2)] pontos = zip(x, y) clusters, iteracoes = kmeans(pontos, 2, centros_iniciais=centros_iniciais) cluster1 = clusters[0].pontos cluster2 = clusters[1].pontos plt.plot([xi[0] for xi in cluster1], [yi[1] for yi in cluster1], 'ro') plt.plot([clusters[0].centroide[0]], [clusters[0].centroide[1]], 'r*') plt.plot([xi[0] for xi in cluster2], [yi[1] for yi in cluster2], 'go') plt.plot([clusters[1].centroide[0]], [clusters[1].centroide[1]], 'g*') plt.savefig('grupo1.png') print "Novos centróides:", clusters[0].centroide, " e ", clusters[1].centroide
def save_train_dataset_as_nifti(results_dir=os.path.join( paths.results_folder, "final"), out_dir=os.path.join(paths.results_folder, "training_set_results")): if not os.path.isdir(out_dir): os.mkdir(out_dir) a = load_dataset() for fold in range(5): working_dir = os.path.join(results_dir, "fold%d" % fold, "validation") ids_in_fold = os.listdir(working_dir) ids_in_fold.sort() ids_in_fold = [ i for i in ids_in_fold if os.path.isdir(os.path.join(working_dir, i)) ] ids_in_fold_as_int = [int(i) for i in ids_in_fold] for pat_id in ids_in_fold_as_int: pat_in_dataset = a[pat_id] seg_pred = np.load( os.path.join(working_dir, "%03.0d" % pat_id, "segs.npz"))['seg_pred'] b = convert_to_original_coord_system(seg_pred, pat_in_dataset) sitk_img = sitk.GetImageFromArray(b) sitk_img.SetSpacing(pat_in_dataset['spacing']) sitk_img.SetDirection(pat_in_dataset['direction']) sitk_img.SetOrigin(pat_in_dataset['origin']) sitk.WriteImage( sitk_img, os.path.join(out_dir, pat_in_dataset['name'] + ".nii.gz"))
def save_test_set_as_nifti(results_dir=os.path.join(paths.results_folder, "final"), out_dir=os.path.join(paths.results_folder, "test_set_results")): if not os.path.isdir(out_dir): os.mkdir(out_dir) a = load_dataset(folder=paths.preprocessed_testing_data_folder) for pat in a.keys(): probs = [] for fold in range(5): working_dir = os.path.join(results_dir, "fold%d" % fold, "pred_test_set") res = np.load(os.path.join(working_dir, "%03.0d" % pat, "segs.npz")) probs.append(res['softmax_ouput'][None]) prediction = np.vstack(probs).mean(0).argmax(0) prediction_new = convert_to_brats_seg(prediction) np.savez_compressed(os.path.join(out_dir, "%03.0d.npz" % pat), seg=prediction) b = convert_to_original_coord_system(prediction_new, a[pat]) sitk_img = sitk.GetImageFromArray(b) sitk_img.SetSpacing(a[pat]['spacing']) sitk_img.SetDirection(a[pat]['direction']) sitk_img.SetOrigin(a[pat]['origin']) sitk.WriteImage(sitk_img, os.path.join(out_dir, a[pat]['name'] + ".nii.gz"))
def import_ds(ds, type=None, parent_id=None): if type == 'parent': ds['is_parent'] = 'true' else: ds.pop('is_parent', None) if type == 'child': ds['parent_dataset'] = parent_id else: ds.pop('parent_dataset', None) dataset_dummy = create_dummy_dataset() dataset_dummy['title'] = ds['title'] dataset_dummy['owner_org'] = owner_org # first run to get name created. ds_created = dataset_dummy.create(create_url, api_key) # then update the dataset with all info dataset_full = load_dataset(ds_created) map_dataset(dataset_full, ds) dataset_full._update(update_url, api_key) # add resource resources = ds.get('distribution', []) for res in resources: resource = Resource() map_resource(resource, res, dataset_full['id']) # skip and report empty resource if resource['url']: res_created = resource.create(resource_url, api_key) else: logging.info(' Empty resource skipped for: %s' % ds['title']) return dataset_full
def debug(folders, n_components, r = None, max_dimension = 1): X,y = load_dataset(folders) p = Preprocess(n_components) X = p.fit_transform(X) if r is None: distances = PairwiseDistances(X.tolist()) distances = ExplicitDistances(distances) n_samples = len(X) r_candidates = sorted(set(np.array(distances.distances).flatten())) for r2 in r_candidates: print r2 cx = vietoris_rips(X.tolist(), max_dimension, r2) cords = mds_plot(X, y) lines_plot(cx, cords) plt.show() else: cx = vietoris_rips(X.tolist(), max_dimension, r) actual_max_dimension = len(max(cx, key=len)) - 1 for d in range(actual_max_dimension, 2, -1): sx_d = filter_simplices(cx, d) print "dimension", d, ":", len(sx_d), "simplices" for i, sx in enumerate(sx_d): print i, "..." cords = mds_plot(X, y) edges = list(combinations(sx, 2)) lines_plot(edges, cords, color=np.random.rand(3,)) plt.show()
def main(argv, input_break, target_break, apply_target, S, max_epoch, lr, B=1): if (len(argv) != 3): print('Error en la entrada') [_, model, data] = argv # lectura y pre procesamiento de datos (input, target) = load_dataset(data, input_break, target_break, apply_target) if (not path.exists(model + '.p')): # entrenamiento errors, W = train(input, target, S, max_epoch, lr, B) plot_error(errors) pickle.dump(errors, open(model + '_errors.p', 'wb')) pickle.dump(W, open(model + '.p', 'wb')) else: # testing # carga de modelo entrenado W = pickle.load(open(model + '.p', 'rb')) # testeo r, Y, Z = test(input, target, S, W) if Z.shape[1] == 1: print('precisión: {} (aciertos/total)'.format(r)) else: print('error cuadratico medio: {}'.format(r))
def convert_quant(model): """ Convert Keras model to quantized tflite reference: https://www.tensorflow.org/lite/performance/post_training_quantization """ (x_train, _), (_, _), (_, _) = dataset.load_dataset(config.DATASET_PATH) x_train = x_train.astype('float32') # Calibration for 1 epoch def representative_dataset_gen(): for i in range(len(x_train)): # Get sample input data as a numpy array in a method of your choosing. # Format NHW yield [x_train[i][tf.newaxis, ..., tf.newaxis]] model_path = "trained_models/" + config.NETWORK + ".h5" converter = tf.compat.v1.lite.TFLiteConverter.from_keras_model_file( model_path) converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] converter.inference_input_type = tf.int8 converter.inference_output_type = tf.int8 converter.representative_dataset = representative_dataset_gen tflite_quant_model = converter.convert() open("trained_models/" + config.NETWORK + "_quant.tflite", "wb").write(tflite_quant_model)
def test(model): train_iter, test_iter = load_dataset('spam', batchsize) print('Starting testing') truth_res = [] pred_res = [] avg_loss = 0.0 avg_acc = 0.0 i = 0 for batch in test_iter: sentence = batch.sentence label = batch.label print("Batch :%d" % (i)) i = i + 1 truth_res += list(label.data) model.zero_grad() model.batch_size = np.shape(sentence)[1] model.hidden = model.init_hidden() tag_scores = model(sentence) pred_label = tag_scores.cpu().data.max(1)[1].numpy() pred_res += [x for x in pred_label] acc = get_accuracy(truth_res, pred_res) avg_acc += acc t = avg_acc / i print('Test Classification accuracy') print(t)
def generate_features_main(): print "\nLoading dataset" X_train, X_test, y_train, y_test = dataset.load_dataset() print "Creating features of train set ({} images)".format(len(X_train)) features_train = np.array([get_features(x) for x in tqdm(X_train)]) print "Creating features of test set ({} images)".format(len(X_test)) features_test = np.array([get_features(x) for x in tqdm(X_test)]) print "Flattening individual annotated images" y_train = flatten_images(np.array(y_train)) y_test = flatten_images(np.array(y_test)) print "Flattening all pixels" features_train = np.concatenate(features_train) features_test = np.concatenate(features_test) y_train = y_train.flatten() y_test = y_test.flatten() print "Features train {0}, Features test {1}".format(features_train.shape, features_test.shape) print "Labels train {0}, Labels test {1}".format(y_train.shape, y_test.shape) print "Writing X to file" write_features((features_train,features_test)) print "Writing Y to file" write_y((y_train, y_test)) print "Done."
def go_(self, **kw): d = load_dataset(kw.get('dims', 1), kw.get('n', 10), spline=kw.get('spline', 5)) crit = kw.get('criterion', torch.nn.MSELoss) optim = kw.get('optimizer', torch.optim.Adam) optim_args = kw.get( 'optim_args', { "lr": kw.get('optim_lr', 1e-3), "weight_decay": kw.get('optim_weight_decay', 1e-5), }) model_args = kw.get('model_args', (kw.get( 'dims', 1), kw.get('model_Nodes', 10), kw.get('model_dimOut', 1))) epochs = kw.get('epochs', 1) def mb(): m = model(*model_args, crit(), optim, optim_args, epochs) return m # run algo algo = algorithm(d, mb) print(algo.data.path) res = algo.run() # save algo s = saver() return s.save(kw.get('name', self.name + '/' + str(kw.get('pset_i', 0))), \ algo, model_args, crit, optim, optim_args, epochs)
def run(fold=0): print fold I_AM_FOLD = fold all_data = load_dataset(folder=paths.preprocessed_validation_data_folder) use_patients = all_data experiment_name = "final" results_folder = os.path.join(paths.results_folder, experiment_name, "fold%d" % I_AM_FOLD) write_images = False save_npy = True INPUT_PATCH_SIZE = (None, None, None) BATCH_SIZE = 2 n_repeats = 2 num_classes = 4 x_sym = T.tensor5() net, seg_layer = build_net(x_sym, INPUT_PATCH_SIZE, num_classes, 4, 16, batch_size=BATCH_SIZE, do_instance_norm=True) output_layer = seg_layer results_out_folder = os.path.join(results_folder, "pred_val_set") if not os.path.isdir(results_out_folder): os.mkdir(results_out_folder) with open( os.path.join(results_folder, "%s_Params.pkl" % (experiment_name)), 'r') as f: params = cPickle.load(f) lasagne.layers.set_all_param_values(output_layer, params) print "compiling theano functions" output = softmax_helper( lasagne.layers.get_output(output_layer, x_sym, deterministic=False, batch_norm_update_averages=False, batch_norm_use_averages=False)) pred_fn = theano.function([x_sym], output) _ = pred_fn( np.random.random((BATCH_SIZE, 4, 176, 192, 176)).astype(np.float32)) run_validation_mirroring(pred_fn, results_out_folder, use_patients, write_images=write_images, hasBrainMask=False, BATCH_SIZE=BATCH_SIZE, num_repeats=n_repeats, preprocess_fn=preprocess, save_npy=save_npy, save_proba=False)
def main(num_seeds, verbose): # Load the dataset dataset = load_dataset( glob('../data/trump_tweet_data_archive/condensed_*.json.zip'), verbose) # We don't need to vectorize the data, but we do need to chunk it into redundant sequences corpus, _, _, c2i, i2c, nc = seq_data(dataset, SEQ_LEN, SEQ_STEP, verbose) model = load_model(BASENAME, verbose) generate(BASENAME, model, corpus, c2i, i2c, nc, num_seeds, verbose)
def load_chunked_dataset(time_window=1,freq=256): """ This function loads dataset as load_dataset function, then chunks it and ret urns it. """ X,y = load_dataset() features,target = chunking(X,y,time_window,freq) return features,target
def _iterate_reals(self, minibatch_size): dataset_obj = dataset.load_dataset(data_dir=config.data_dir, **self._dataset_args) while True: images, _labels = dataset_obj.get_minibatch_np(minibatch_size) if self._mirror_augment: images = misc.apply_mirror_augment(images) yield images
def load_chunked_datasetFFT(): """ This function loads dataset as load_dataset function, then chunks it and ret urns it. """ X,y = load_dataset() features,target = chunking_FFT(X,y) return features,target
def main(config): if config.task == 'train': config.train = 1 else: config.train = 0 if config.dataset == 'life': config.task = 'regression' config.experiment = 'train-test' else: config.task = 'classification' config.experiment = 'doublecv' config.expt_name = "Exp" + str( config.experiment ) + "_" + config.mod_split + "_" + config.build_model + "_" + config.last_layer # Create save directories utils.create_directories(config) data = load_dataset(config) if config.experiment == 'mar_doublecv' or config.experiment == 'doublecv': n_feature_sets = len(data.keys()) - 1 elif config.dataset == 'life': n_feature_sets = int(len(data.keys()) / 2) - 1 X = [np.array(data['{}'.format(i)]) for i in range(n_feature_sets)] y = np.array(data['y']) X_test = None y_test = None if config.task == 'classification': config.n_classes = len(set(y)) if config.dataset == 'life': X_test = [ np.array(data['{}_test'.format(i)]) for i in range(n_feature_sets) ] y_test = np.array(data['y_test']) config.n_feature_sets = n_feature_sets config.feature_split_lengths = [i.shape[1] for i in X] if config.verbose > 0: print('Dataset used ', config.dataset) print('Number of feature sets ', n_feature_sets) [ print('Shape of feature set {} {}'.format(e, np.array(i).shape)) for e, i in enumerate(X) ] trainer.train(X, y, config, X_test, y_test) print(config.expt_name) print(config.dataset)
def train(): tf.random.set_seed(22) np.random.seed(22) data_iter = dataset.load_dataset() # 利用数组形式实现多输入模型 generator = Generator() generator.build(input_shape=[(None, z_dim), (None, 10)]) discriminator = Discriminator() discriminator.build(input_shape=[(None, 28, 28, 1), (None, 10)]) g_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) d_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) for epoch in range(epochs): for i in range(int(60000 / batch_size / epochs_d)): batch_z = tf.random.uniform([batch_size, z_dim], minval=0., maxval=1.) batch_c = [] for k in range(batch_size): batch_c.append(np.random.randint(0, 10)) batch_c = tf.one_hot(tf.convert_to_tensor(batch_c), 10) # train D for epoch_d in range(epochs_d): batch_data = next(data_iter) batch_x = batch_data[0] batch_y = batch_data[1] with tf.GradientTape() as tape: d_loss = d_loss_fn(generator, discriminator, batch_z, batch_c, batch_x, batch_y, is_training) grads = tape.gradient(d_loss, discriminator.trainable_variables) d_optimizer.apply_gradients( zip(grads, discriminator.trainable_variables)) # train G with tf.GradientTape() as tape: g_loss = g_loss_fn(generator, discriminator, batch_z, batch_c, is_training) grads = tape.gradient(g_loss, generator.trainable_variables) g_optimizer.apply_gradients( zip(grads, generator.trainable_variables)) print('epoch : {epoch} d-loss : {d_loss} g-loss : {g_loss}'.format( epoch=epoch, d_loss=d_loss, g_loss=g_loss)) z = tf.random.uniform([100, z_dim], minval=0., maxval=1.) c = [] for i in range(10): for j in range(10): c.append(i) c = tf.one_hot(tf.convert_to_tensor(c), 10) fake_image = generator([z, c], training=False) img_path = os.path.join('images', 'infogan-%d-final.png' % epoch) saver.save_image(fake_image.numpy(), img_path, 10)
def LDA_process(dataset): fea, link, label = load_dataset(dataset) corpus = matutils.Dense2Corpus(fea, documents_columns=False) num_topics = 100 print 'performing lda...' model = models.LdaModel(corpus, num_topics=num_topics, passes=10) topic_fea = matutils.corpus2dense(model[corpus], num_topics) topic_fea = topic_fea.transpose() np.save('dataset/'+dataset+'/lda_fea', topic_fea)
def visualize(name): fea, link, label = dataset.load_dataset(name) label = np.argmax(label, axis=1) label = label.astype("float") label = label + 1 label = label / max(label) link = link.tocsc() g = nx.Graph(link) nx.draw_networkx(g, node_size=100, with_labels=False, node_color=label) plt.show()
def main(product): FILE = "../data/ABSA-15_{}_Train_Data.xml".format(product) reviews = load_dataset(FILE) FILE = "../data/ABSA15_{}_Test.xml".format(product) reviews += load_dataset(FILE) entities = set() attributes = set() for rv in reviews: for stc in rv.sentences: for opi in stc.opinions: cate = opi.category entity, attribute = cate.split('#') entities.add(entity) attributes.add(attribute) list2file(entities, "../data/{}.entity".format(product)) list2file(attributes, "../data/{}.attribute".format(product))
def community_label_entropy(name): fea, link, label = dataset.load_dataset(name) c_fea = get_c_fea(name) cl = c_fea.transpose().dot(label) l = cl.shape[0] entropy = [] for i in range(l): x = cl[i,:] entropy.append(stats.entropy(x[x.nonzero()])) return np.mean(entropy)
def get_c_fea(name): fea, link, label = dataset.load_dataset(name) num_inst = link.shape[0] g = nx.Graph(link) partition = community.best_partition(g) communities = partition.values() loc_fea = np.zeros((num_inst, max(communities)+1)) for i, v in enumerate(communities): loc_fea[i, v] = 1 return loc_fea
def main(): optparser = OptionParser() optparser.add_option("--train", dest="train_file", help="training file name") optparser.add_option("--test", dest="test_file", help="testing file") optparser.add_option('--pro', dest='product') (options, args) = optparser.parse_args() save_as_mimlmix_format.PATH = "./{}/data/".format(options.product) train_reviews = load_dataset(options.train_file) test_reviews = load_dataset(options.test_file) n_cates, cate_index = get_categories(train_reviews + test_reviews) vocab_size = 1000 vocab_index = get_vocab(train_reviews, vocab_size) train_bags = [extract_unigram(vocab_index, vocab_size, review)\ for review in train_reviews] train_labels = [extract_labels(cate_index, review)\ for review in train_reviews] test_bags = [extract_unigram(vocab_index, vocab_size, review)\ for review in test_reviews] test_labels = [extract_labels(cate_index, review)\ for review in test_reviews] save_label_id(cate_index) save_view_info(view_name="ngram", dim=vocab_size,\ data_format="sparse", view_type="discrete") features = train_bags + test_bags save_sparse_feature(corpus_name=options.product, view_name="ngram", features=features) labels = train_labels + test_labels save_label(options.product, labels) save_partition(len(train_labels), len(test_labels)) #word2vec word2vec_feat(train_reviews+test_reviews) print("Done")
def evaluate(dataset_name, fl, ratio): print dataset_name, fl.__name__, ratio d = dataset.load_dataset(dataset_name) fea = d.data label = d.target fea = fl(fea) ss = StratifiedShuffleSplit(label, 3, test_size=(1-ratio), random_state=0) svc = LinearSVC() for train, test in ss: svc.fit(fea[train,:], label[train,:]) predict = svc.predict(fea[test, :]) acc = accuracy_score(label[test, :], predict) print acc
def evaluate(dataset, model): kfold = load_cv(dataset) fea, link, label = load_dataset(dataset) errors = [] for train, test in kfold: tmp_label = label.copy() tmp_label[test,:] = 0 tmp_label = model.fit_predict(fea, link, train, tmp_label) error = np.abs(tmp_label[test, :]-label[test, :]).sum() / 2 / tmp_label.shape[0] errors.append(error) print error print 'mean', np.mean(errors) return errors
def load_dataset_for_previous_run(run_id, **kwargs): # => dataset_obj, mirror_augment result_subdir = locate_result_subdir(run_id) # Parse config.txt. parsed_cfg = dict() with open(os.path.join(result_subdir, 'config.txt'), 'rt') as f: for line in f: if line.startswith('dataset =') or line.startswith('train ='): exec(line, parsed_cfg, parsed_cfg) dataset_cfg = parsed_cfg.get('dataset', dict()) train_cfg = parsed_cfg.get('train', dict()) mirror_augment = train_cfg.get('mirror_augment', False) # Handle legacy options. if 'h5_path' in dataset_cfg: dataset_cfg['tfrecord_dir'] = dataset_cfg.pop('h5_path').replace('.h5', '') if 'mirror_augment' in dataset_cfg: mirror_augment = dataset_cfg.pop('mirror_augment') if 'max_labels' in dataset_cfg: v = dataset_cfg.pop('max_labels') if v is None: v = 0 if v == 'all': v = 'full' dataset_cfg['max_label_size'] = v if 'max_images' in dataset_cfg: dataset_cfg.pop('max_images') # Handle legacy dataset names. v = dataset_cfg['tfrecord_dir'] v = v.replace('-32x32', '').replace('-32', '') v = v.replace('-128x128', '').replace('-128', '') v = v.replace('-256x256', '').replace('-256', '') v = v.replace('-1024x1024', '').replace('-1024', '') v = v.replace('celeba-hq', 'celebahq') v = v.replace('cifar-10', 'cifar10') v = v.replace('cifar-100', 'cifar100') v = v.replace('mnist-rgb', 'mnistrgb') v = re.sub('lsun-100k-([^-]*)', 'lsun-\\1-100k', v) v = re.sub('lsun-full-([^-]*)', 'lsun-\\1-full', v) dataset_cfg['tfrecord_dir'] = v # Load dataset. dataset_cfg.update(kwargs) dataset_obj = dataset.load_dataset(data_dir=config.data_dir, **dataset_cfg) return dataset_obj, mirror_augment
def stats(name): fea, link, label = dataset.load_dataset(name) g = nx.Graph(link) components = nx.connected_components(g) num_node = link.shape[0] num_link = link.sum() / 2 density = float(2 * num_link) / num_node ratio = float(len(components[0])) / link.shape[0] row, col = link.nonzero() label = np.argmax(label, axis=1) homogeneity = float((label[row] == label[col]).sum()) / len(row) info = { "name": name, "ratio": ratio, "homogeneity": homogeneity, "num_node": num_node, "num_link": num_link, "density": density, } print info
def teste_dataset(dataset, titulo): dataset = load_dataset(dataset) if len(dataset) > 100: np.random.shuffle(dataset) dataset = dataset[:100] pontos = [[float(linha[0]), float(linha[1])] for linha in dataset] num_classes, criterio_parada = 4, 0.01 iteracoes_km = [] iteracoes_kmpp = [] for i in range(30): clusters, iteracoes = kmeans(pontos, num_classes, criterio_parada) iteracoes_km.append(iteracoes) clusters, iteracoes = kmeanspp(pontos, num_classes, criterio_parada) iteracoes_kmpp.append(iteracoes) fig, axes = plt.subplots(nrows=1, ncols=2) axes[0].boxplot(iteracoes_km, labels=['K-means']) axes[0].set_title(titulo) axes[1].boxplot(iteracoes_kmpp, labels=['K-means++']) axes[1].set_title(titulo) plt.show()
from textgen import generate_c2w2c_text, generate_word_lstm_text from util import info, Timer sys.setrecursionlimit(40000) MIN_LR = 0.0001 MAX_PATIENCE = 1 params = model_params.from_cli_args() params.print_params() print '' use_unk = params.mode == 'WORD' print 'Loading training data...' training_dataset = load_dataset(params.training_dataset, params.train_data_limit, use_unk) training_dataset.print_stats() print '' print 'Loading test data...' test_dataset = load_dataset(params.test_dataset, params.test_data_limit, use_unk) test_dataset.print_stats() print '' # Vocabularies V_C = make_char_vocabulary([training_dataset]) V_W = training_dataset.vocabulary print 'V_C statistics:' print ' - Distinct characters: %d' % V_C.size print ''
def dump(dataset): print ("dumping",dataset) with open(dataset+".pickle","wb") as f: pickle.dump(load_dataset(dataset),f)
def prepare_cv(dataset, train_ratio = 0.1): fea, link, label = load_dataset(dataset) cv = ShuffleSplit(fea.shape[0], 10, test_size=1-train_ratio, indices=False, random_state=0) pickle.dump(cv, open('benchmark/cv/'+dataset,'wb'))
# -*- coding: utf-8 -*- import feature_extraction import dataset import pickle import numpy as np from searcher import Searcher if __name__ == '__main__': # TODO: load experiment using params train_path = "" test_path = "" # loads images from given paths train = dataset.load_dataset(train_path) test = dataset.load_dataset(test_path) # extracts descriptors for train and test sets train_descriptors = {item.path:feature_extraction.extract_descriptors(item.data) for item in train} test_descriptors = {item.path:feature_extraction.extract_descriptors(item.data) for item in test} # creates codebook (default size=300) based on train samples codebook = feature_extraction.create_codebook(np.concatenate(train_descriptors.values())) # generate feature vectors for train and test based on previously calculated codebook train_features = {key:feature_extraction.extract_features(codebook, train_descriptors[key]) for key in train_descriptors} test_features = {key:feature_extraction.extract_features(codebook, test_descriptors[key]) for key in test_descriptors} # TODO: create a similarity matrix using all features # persists features, codebook and similarity matrix pickle.dump(train_features, open("train_features.pk", "wb")) pickle.dump(test_features, open("test_features.pk", "wb"))
""" Override fit_transform to avoid calling transform twice on the standard scaler (in fit and transform) """ return self.pca.fit_transform(self.scaler.fit_transform(x)) def inverse_transform(self, x, only_pca=False): """ First undo the PCA transformation, then undo the scaling unless only_pca """ if only_pca: return self.pca.inverse_transform(x) return self.scaler.inverse_transform(self.pca.inverse_transform(x)) if __name__ == '__main__': from dataset import load_dataset x,y = load_dataset(['../data/tea_cup', '../data/spoon']) # Test fit -> transform vs fit_transform p = Preprocess(0.7) p.fit(x) x2 = p.transform(x) x3 = p.fit_transform(x) assert np.all(np.isclose(x2, x3)) # Test inverse transform. # With all PCA components retained, the inverse should be equal original. p2 = Preprocess() x4 = p2.fit_transform(x) x5 = p2.inverse_transform(x4) assert np.all(np.isclose(x5, x))
for review in reviews: for sent in review.sentences: sent_str = ' '.join(sent.words) yield sent_str for sent in linesentence: yield sent train_file = "../../data/ABSA-15_Laptops_Train_Data.xml" test_file = "../../data/ABSA15_Laptops_Test.xml" logging.basicConfig(format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s', level=logging.INFO) logging.info("running %s" % " ".join(sys.argv)) train_reviews = load_dataset(train_file) #train_sents = sent_iter(train_reviews) test_reviews = load_dataset(test_file) #test_sents = sent_iter(test_reviews) unlabeled_sents = LineSentence("../../data/laptop.unlabeled.txt") model_1 = Sent2Vec(sent_iter(train_reviews, unlabeled_sents),\ model_file="../../models/laptop.word2vec.model") model_1.save_sent2vec_format("../../models/laptop.sent2vec.model") model_2 = Sent2Vec(sent_iter(train_reviews+test_reviews, unlabeled_sents), model_file="../../models/laptop.word2vec.model") model_2.save_sent2vec_format("../../models/laptop_with_test.sentenc2vec.model")
def main(num_epochs=100): print("Loading data...") X_train, y_train, X_val, y_val, X_test, y_test = load_dataset() input_var = T.tensor4('inputs') target_var = T.ivector('targets') print("Building model and compiling functions...") network = build_cnn(input_var) prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum( loss, params, learning_rate=0.01, momentum=0.9) test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) train_fn = theano.function([input_var, target_var], loss, updates=updates) val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) print("Starting training...") for epoch in range(num_epochs): train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100)) # Dump the network weights to a file like this: np.savez(os.path.join(checkpoint_path, 'model.npz'), *lasagne.layers.get_all_param_values(network))
def main(product): TRAIN_FILE = "../data/ABSA-15_{}_Train_Data.xml".format(product) TEST_FILE = "../data/ABSA15_{}_Test.xml".format(product) # load data set training_reviews = load_dataset(TRAIN_FILE) testing_reviews = load_dataset(TEST_FILE) # build vocab vocab = build_vocab(training_reviews, TOPN=1000) vocab_index = list2dict(vocab) cate_index = get_all_categories(training_reviews) cates = dict2list(cate_index) n_cates = len(cates) print "Loading alignment model" align_model = load_align_model("s2t64.actual.ti.final") print "Get prior" prior = get_prior(training_reviews) print "Training level 2 model..." lev2_model = train_pola_clf(training_reviews, vocab_index, cate_index) print "Predicting..." results = [] for review in testing_reviews: for sent in review.sentences: pairs_predict = predict(sent, align_model, prior, lev2_model, vocab_index, cate_index) results.append(pairs_predict) print "Evaluation" opinions = [] for review in testing_reviews: for sent in review.sentences: #opis = [(cate_index[opi.category], opi.polarity) for opi in sent.opinions] opis = [] for opi in sent.opinions: if opi.category in cate_index: opis.append((cate_index[opi.category], opi.polarity)) opinions.append(opis) TP1 = 0.0 FP1 = 0.0 FN1 = 0.0 for i in range(len(opinions)): o = set([pair[0] for pair in results[i]]) g = set([pair[0] for pair in opinions[i]]) TP1 += len(o & g) FP1 += len(o - g) FN1 += len(g - o) p = TP1 / (TP1 + FP1) r = TP1 / (TP1 + FN1) if p + r == 0: f = 0 else: f = 2. * p * r / (p + r) print p, r, f TP2 = 0.0 FP2 = 0.0 FN2 = 0.0 for i in range(len(opinions)): o = set(results[i]) g = set(opinions[i]) TP1 += len(o & g) FP1 += len(o - g) FN1 += len(g - o) p = TP1 / (TP1 + FP1) r = TP1 / (TP1 + FN1) if p + r == 0: f = 0 else: f = 2. * p * r / (p + r) print p, r, f
parser.add_argument('--datadir', type=str, default='data') args = parser.parse_args() if args.gpu >= 0: cuda.check_cuda_available() xp = cuda.cupy if args.gpu >= 0 else np batchsize = 100 n_epoch = args.epoch n_units = 1000 # create result dir log_fn, result_dir = create_result_dir(args) # Prepare dataset print('load CIFAR10 dataset') dataset = load_dataset(args.datadir) x_train, y_train, x_test, y_test = dataset x_train = x_train.astype(np.float32) / 255.0 y_train = y_train.astype(np.int32) x_test = x_test.astype(np.float32) / 255.0 y_test = y_test.astype(np.int32) N = x_train.shape[0] N_test = x_test.shape[0] models = [] model = VGG_mini() if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu()
'k': k, 'momentum': momentum, 'mae': mae, 'rmse': rmse, 'lrate': current_l_w } config_result['results'].append(iteration_result) print(iteration_str.format(j, k, current_l_w, momentum, mae, rmse)) with open('{}_{}.json'.format(config_name, name), 'wt') as res_output: res_output.write(json.dumps(config_result, indent=4)) if __name__ == "__main__": experiments = read_experiment(sys.argv[1]) for experiment in experiments: name = experiment['name'] train_path = experiment['train_path'] test_path = experiment['test_path'] sep = experiment['sep'] configs = experiment['configs'] all_users, all_movies, tests = load_dataset(train_path, test_path, sep, user_based=True) for config in configs: run(name, train_path, config, all_users, all_movies, tests, None, sep)
import numpy as np import sys import math from scipy.misc import imsave, imread from scipy.sparse.linalg import lsqr import cv2 import time from util import pyrup, save_mesh, form_poisson_equation, pyrdown from dataset import load_dataset assert len(sys.argv) > 2 data = load_dataset(sys.argv[1]) mode = sys.argv[2] assert mode in ('normals', 'depth', 'both') alpha = data.right_alpha depth_weight = None depth = None K_right = None normals = None albedo = None tic = time.time() if mode in ('normals', 'both'): albedo = imread(data.albedo_png) normals = np.load(data.normals_npy) if mode in ('depth', 'both'):