def __init__(self, data, labeled_percent, num_classes, feature_shape, label_shape, embeddings=None, use_embedding=False): assert isinstance(data, np.ndarray) # self._data = xar.Dataset({'feature': xar.DataArray(data[0]), 'label': data[1]}) # self._data = pd.DataFrame(data={'id': [str(i) for i in range(len(data[0]))], # 'feature': data[0], # 'label': data[1]}, # columns=['id', 'feature', 'label']) # self._data_l = self._data.iloc[labeled_indices] # self._data_u = self._data.iloc[unlabeled_indices] self._data = data unlabeled_indices, labeled_indices, _ = data_utils.split_dataset( data, labeled_percent, 0, num_classes=num_classes, ret_indices=True) self._data_l = data[labeled_indices] self._data_u = data[unlabeled_indices] self._num_classes = num_classes self._feature_shape = feature_shape self._label_shape = label_shape self._embeddings = np.array( embeddings) if embeddings is not None else None if self._embeddings is not None: self._embedded_data = self._embeddings self._embedded_data_u = self._embeddings[unlabeled_indices] self._embedded_data_l = self._embeddings[labeled_indices] elif use_embedding: print("Start embedding.") _tmp_data = np.array( data['feature'].tolist()).reshape(feature_shape) if np.ndim(data) == 4: _tmp_data = np.sum(_tmp_data, axis=-1) # Reshape. shape = [_tmp_data.shape[0], 1] for i in range(1, np.ndim(_tmp_data)): shape[1] *= _tmp_data.shape[i] _tmp_data = _tmp_data.reshape(shape) embedding_func = manifold.SpectralEmbedding(n_components=128) self._embedded_data = embedding_func.fit_transform(_tmp_data) self._embedded_data_l = self._embedded_data[unlabeled_indices] self._embedded_data_u = self._embedded_data[labeled_indices] print('Embedding finished, time %s' % time.time())
def retrieve(image_dir, gallery_dir, model_path, gallery_encode, feature_code): """ :param image_dir: 图片根目录,内有两个子文件夹,query和gallery,都保存有图片 :param gallery_dir: build_gallery将数据保存在此目录,single_query从此目录读取数据 :param model_path: 加载指定模型 :param gallery_encode: True 则进行特征编码,否则加载已经保存的文件 :param feature_code: 1-scda ,2-scda_flip,3-scda_plus,4-scda_flip_plus.input_batch and output_layer also different :return: """ # check dir assert os.path.isdir(gallery_dir), 'no directory name {}'.format( gallery_dir) # 保存gallery的文件夹 assert os.path.isdir(image_dir), 'no directory name {}'.format( image_dir) # 数据集文件夹 assert os.path.isfile(model_path), 'model path not given!' # build model input_shape = (None, None, None, 3) images = tf.placeholder(shape=input_shape, dtype=tf.float32) final_output, feature_dict = vgg.vgg_16(inputs=images, num_classes=None, is_training=False) # print(feature_dict) feature_1 = feature_dict['vgg_16/pool5'] feature_2 = feature_dict['vgg_16/conv5/conv5_2'] # final output node depend on feature code if feature_code == 1 or feature_code == 2: feature = feature_1 else: feature = [feature_1, feature_2] # restore 过滤掉一些不需要加载参数 返回dict可以将保存的变量对应到模型中新的变量,返回list直接加载 include_vars_map = None saver = tf.train.Saver(include_vars_map) # define session with tf.Session() as sess: # load param sess.run(tf.global_variables_initializer()) print(model_path) saver.restore(sess, model_path) # data_set query_im_paths, query_labels, gallery_im_paths, gallery_labels = data_utils.split_dataset( image_dir) # gallery特征提取或加载 if gallery_encode: gallery_features = build_gallery(sess, images, feature, feature_code, gallery_im_paths, gallery_dir) else: gallery_features = np.load( os.path.join(gallery_dir, 'gallery_features.npy')) # 开始检索 query(sess, images, feature, feature_code, query_im_paths, gallery_features, query_labels, gallery_labels)
def main(): parser = argparse.ArgumentParser(description='Linear Regression test') parser.add_argument('-n', '--n_iter', type=int, default=50, help='number of iterations for grad_descent') parser.add_argument('-f', '--n_features', type=int, default=2, help='number of features') args = parser.parse_args() n_iter = args.n_iter n_features = args.n_features X, y, centers = generate_classification_data(n_features=n_features) X_train, X_test, y_train, y_test = split_dataset(X, y) print("Training size: %s, Test size %s" % (len(X_train), len(X_test))) print("-" * 20) # Plotting dataset plot_points_and_cluster(X, centers) # Fit and predict model = LogisticRegression(n_iter=n_iter) model.fit(X_train, y_train) y_pred = model.predict(X_test) print("-" * 20) # Scoring model.score(y_test, y_pred) print("-" * 20) # Plot decision boundary if n_features == 2: plot_logistic_regression_decision_boundary(X, y, model) # Plot iteration vs cost plot_iteration_vs_cost(n_iter, model.cost_h)
def main(): parser = argparse.ArgumentParser(description='Linear Regression test') parser.add_argument('-m', '--method', type=str, default='ols', help='model method: ols or grad_descent') parser.add_argument('-n', '--n_iter', type=int, default=50, help='number of iterations for grad_descent') args = parser.parse_args() method = args.method n_iter = args.n_iter X, y, m, bias = \ generate_linear_data(n_samples=1000, n_features=10, bias=10) X_train, X_test, y_train, y_test = split_dataset(X, y) print("Training size: %s, Test size %s" % (len(X_train), len(X_test))) print("-" * 20) # Fit and predict model = LinearRegression(n_iter=n_iter) model.fit(X_train, y_train, method) y_pred = model.predict(X_test) print("-" * 20) # Scoring model.score(y_test, y_pred) print("-" * 20) print("True coefs: ", np.insert(m, 0, bias)) print("Model coefs:", model.beta_hat) print("-" * 20) # Plotting plot_regression_residual(y_test, y_pred, bins=int(len(X_train) / 20)) if method == 'grad_descent': plot_iteration_vs_cost(n_iter, model.cost_h)
if __name__ == '__main__': np.random.seed(1) _input_shape = [60, 41, 2] _num_classes = 10 _num_total = 12500 _pairs = data_utils.load_data('../data/urbansound8k/pairs') _pairs = np.random.choice(_pairs, _num_total, replace=False) # Extract features and labels. _train_pairs, _, test_pairs = data_utils.split_dataset( pairs=_pairs, valid_percent=0, test_percent=0.2, num_classes=_num_classes) _labels = np.array(_pairs['label'].tolist()) _class_weights = [ _num_total / sum(_labels == i) for i in range(_num_classes) ] _train_features = np.reshape(_train_pairs['feature'].tolist(), [-1] + _input_shape) _train_labels = to_categorical(_train_pairs['label'].tolist()) _test_features = np.reshape(test_pairs['feature'].tolist(), [-1] + _input_shape) _test_labels = to_categorical(test_pairs['label'].tolist())
model_name = args.model NUM_HIST, NUM_PRED = args.n_hist, args.n_pred BATCH_SIZE, NUM_EPOCHS, LEARNING_RATE = args.batch_size, args.num_epochs, args.lr # Reading data # data = pd.read_csv('data/daily-total-female-births.csv') raw_data = pd.read_csv('data/mpi_roof.csv', infer_datetime_format=True, parse_dates=['Date Time']) data = resample_time(raw_data, 'Date Time', '4H') time_seq = data['T (degC)'].values # Preparing data, normalise, and train-val-test split X, y = sliding_windows(time_seq, window=NUM_HIST, overlap=1, num_pred=NUM_PRED) data_dict, MIN_VAL, MAX_VAL = normalise( split_dataset(X, y, test_size=0.2, seed=22)) # Generating dataloaders for modelling train_loader = generate_dataloader(data_dict['train'][0], data_dict['train'][1], batch_size=BATCH_SIZE) val_loader = generate_dataloader(data_dict['val'][0], data_dict['val'][1], batch_size=BATCH_SIZE) test_loader = generate_dataloader(data_dict['test'][0], data_dict['test'][1], batch_size=BATCH_SIZE) # Setting up model if model_name == 'ANN': model = models.ANN(num_layers=2, num_nodes=[NUM_HIST, 64, 1])
def load_and_split_data(csvfile, x_npy, y_npy, num_folds, word_cnn=False): X, Y = du.load_data(csvfile, x_npy, y_npy, word_cnn) folds = du.split_dataset(X, Y, num_folds) return X, Y, folds
print('Framework type: `%s`' % flags.framework_type) print('Framework architecture: `%s`, hist_sel_mode: %s' % (framework_creator.__name__, flags.hist_sel_mode)) tf.set_random_seed(flags.random_seed) np.random.seed(flags.random_seed) # Load dataset. _pairs = data_utils.load_data('data/%s/pairs' % _sub_dir) _pairs = np.random.choice(_pairs, _num_total, replace=False) # Extract features and labels. _train_pairs, _, test_pairs = data_utils.split_dataset( pairs=_pairs, valid_percent=flags.valid_percent, test_percent=flags.test_percent, num_classes=flags.num_classes) _labels = np.array(_pairs['label'].tolist()) _class_weights = [ _num_total / sum(_labels == i) for i in range(_num_classes) ] _train_features = np.reshape(_train_pairs['feature'].tolist(), [-1] + _input_shape) _train_labels = to_categorical(_train_pairs['label'].tolist()) _test_features = np.reshape(test_pairs['feature'].tolist(), [-1] + _input_shape) _test_labels = to_categorical(test_pairs['label'].tolist())