def index_images(folder, features_path, mapping_path, model, glove_path): print("Now indexing images...") word_vectors = utils.load_glove_vectors(glove_path) _, _, paths = utils.load_paired_img_wrd(folder=folder, word_vectors=word_vectors) images_features, file_index = utils.generate_features(paths, model) utils.save_features(features_path, images_features, mapping_path, file_index) return images_features, file_index
def main(): utils.set_up_data_directories() snapshots = {} parameters = {} for dataset in config.datasets: # shape: N_h x N # i.e. #DOFs x #snapshots snapshots[dataset] = utils.load_snapshots(dataset) parameters[dataset] = utils.load_parameters(dataset) for component in config.components: assert config.datasets[0] == 'train', 'The first dataset must be train' print(f'\nComputing targets for component {component}') for dataset in config.datasets: # Snapshot matrix, non-centered S_n = utils.reduce(snapshots[dataset], component) if dataset == 'train': # Compute and store .. # .. mean and POD S_mean = np.mean(S_n, axis=1) S = np.array([col - S_mean for col in S_n.T]).T V, D = do_POD(S) utils.save_POD(V, D, S_mean, component) # .. scaler scaler = StandardScaler() scaler.fit(parameters[dataset]) utils.save_scaler(scaler) else: # Compute centered snapshot matrix S = np.array([col - S_mean for col in S_n.T]).T # Now V, D, S_mean and scaler are available # Compute and store .. # .. features features = compute_features(scaler, parameters[dataset]) utils.save_features(dataset, features) # .. targets targets = compute_targets(S, V, D) utils.save_targets(dataset, component, targets) # .. projection error err_POD_sq = compute_error_POD_sq(S, V, D) utils.save_error_POD_sq(dataset, component, err_POD_sq)
def main(): if not os.path.exists('myData.h5py'): # prepare the data stereo_to_mono(stereo_folder, groundtruth_folder) compress(groundtruth_folder, input_folder) stereo_to_mono(eval_stereo_folder, eval_groundtruth_folder) compress(eval_groundtruth_folder, eval_input_folder) # extract features gt_features, _ = get_features(groundtruth_folder) input_features, _ = get_features(input_folder) eval_gt_features, _ = get_features(eval_groundtruth_folder) eval_input_features, _ = get_features(eval_input_folder) # shuffle features gt_features, input_features = unison_shuffled_copies( gt_features, input_features) eval_gt_features, eval_input_features = unison_shuffled_copies( eval_gt_features, eval_input_features) # save features save_features('myData.h5py', input_features, eval_input_features, gt_features, eval_gt_features)
split_tag.append(i) if len(parsed_words) - 1 not in split_tag: split_tag.append(len(parsed_words) - 1) sent_sub = [] for i in range(1, len(split_tag)): st, ed = split_tag[i - 1], split_tag[i] sent_sub.append(' '.join(parsed_words[st + 1:ed + 1]).strip()) ss_data = [] te = 0 for ss in sent_sub: time_elapsed, parsed_sents, segments = compSP.predict_batch_prob(ss, ref, args.max_token, args.min_word, scaling=0, printout=False) ss_data.append((time_elapsed, parsed_sents, segments)) te += time_elapsed te /= len(sent_sub) sent_data.append(ss_data) sum_elapsed_time += te sum_sent += 1 tbar.set_description('[{}/{} sentence] avg. time/sent: {:.3f}'.format(si, len(doc), sum_elapsed_time / sum_sent)) doc_data.append((name, sent_data)) if (di + 1) % args.save_freq == 0 or (di + 1) == len(texts): text_format = '{}{:05d}.pkl'.format(args.split, save_id) output_path = os.path.join(output_dir, text_format) save_features(output_path, doc_data) doc_data = [] save_id += 1
del tr_titles, tr_desc, X_train gc.collect() X_va = hstack([csr_matrix(X_val.drop(columns_to_drop, axis=1)), va_titles, va_desc]) y_va = X_val['deal_probability'] del va_titles, va_desc, X_val gc.collect() X_te = hstack([csr_matrix(X_test.drop(columns_to_drop, axis=1)), te_titles, te_desc]) del te_titles, te_desc, X_test gc.collect() if nrows is None: utils.save_features(X_tr, xgb_root, "X_train") utils.save_features(X_va, xgb_root, "X_val") utils.save_features(X_te, xgb_root, "test") utils.save_features(y_tr, xgb_root, "y_train") utils.save_features(y_va, xgb_root, "y_val") elif args.feature == "load": print("[+] Load features ") X_tr = utils.load_features(xgb_root, "X_train").any() X_va = utils.load_features(xgb_root, "X_val").any() X_te = utils.load_features(xgb_root, "test").any() y_tr = utils.load_features(xgb_root, "y_train") y_va = utils.load_features(xgb_root, "y_val") print("[+] Done ") X = vstack([X_tr, X_va]) y = np.concatenate((y_tr, y_va))
csr_matrix(data_va.drop(columns_to_drop, axis=1)), va_titles, va_desc ]) y_va = data_va['deal_probability'] del va_titles, va_desc, data_va gc.collect() X_te = hstack([ csr_matrix(data_te.drop(columns_to_drop, axis=1)), te_titles, te_desc ]) del te_titles, te_desc, data_te gc.collect() ################################################################################ # if nrows is None: utils.save_features(X_tr, lgbm_dir, "X_train") utils.save_features(X_va, lgbm_dir, "X_val") utils.save_features(X_te, lgbm_dir, "test") utils.save_features(y_tr, lgbm_dir, "y_train") utils.save_features(y_va, lgbm_dir, "y_val") ################################################################################ elif args.feature == "load": print("[+] Load features ") X_tr = utils.load_features(lgbm_dir, "X_train").any() X_va = utils.load_features(lgbm_dir, "X_val").any() X_te = utils.load_features(lgbm_dir, "test").any() y_tr = utils.load_features(lgbm_dir, "y_train") y_va = utils.load_features(lgbm_dir, "y_val") print("[+] Done ") X = vstack([X_tr, X_va]) y = np.concatenate((y_tr, y_va))
for i, split in enumerate(splits): print(input_path + '/{}*'.format(split)) files = sorted(glob.glob(input_path + '/{}*'.format(split))) print('{} files are found'.format(len(files))) filename = os.path.join(output_path, split + '.pkl') filename_stats = os.path.join(output_path, split + '_stats.pkl') if not os.path.exists(filename): st_time = time.time() # merge data based on filtering rule data_all, full_sent_pos, seg_num_doc = data_processing( files, args.seg_per_sent, args.debug) save_features(filename, data_all) save_features(filename_stats, [full_sent_pos, seg_num_doc]) print('total num. sentences', len(full_sent_pos)) print('elapsed time: {:.3f}s'.format(time.time() - st_time)) else: data_all = load_features(filename) full_sent_pos, seg_num_doc = load_features(filename_stats) print('data is loaded from {} and {}'.format( filename, filename_stats)) full_sent_pos_list.append(full_sent_pos) print_stats(seg_num_doc, '{}-seg_num_doc'.format(split)) # draw data stats draw_stats(full_sent_pos_list, splits, data_name)
def main(): # Load json config config = json.load(open("config.json")) print("[+] Load csv ...") train_df = load_csv(config["train_csv"]) test_df = load_csv(config["test_csv"]) df = pd.concat([train_df, test_df]) del train_df del test_df gc.collect() print("[+] Log price ...") df["price"] = df["price"].apply(np.log1p) df["price"] = df["price"].apply(lambda x: -1 if x == -np.inf else x) print("[+] Create time features ...") df["mon"] = df["activation_date"].dt.month df["mday"] = df["activation_date"].dt.day df["week"] = df["activation_date"].dt.week df["wday"] = df["activation_date"].dt.weekday cat_vars = ["category_name", "parent_category_name", "region", "user_type"] print("[+] Label categories ...") for cat in cat_vars: df[cat] = LabelEncoder().fit_transform(df[cat].values) txt_vars = [ "city", "param_1", "param_2", "param_3", "title", "description" ] print("[+] Merge text ...") for txt in txt_vars: df[txt] = df[txt].astype("str") df["txt"] = "" for txt in txt_vars: df["txt"] += df[txt] delete_columns = [ "item_id", "user_id", "city", "param_1", "param_2", "param_3", "title", "description", "activation_date", "image" ] print("[+] Delete unused columns ...") for c in delete_columns: df = df.drop(c, axis=1) print("[+] Extract TFIDF ...") df["txt"] = df["txt"].apply(lambda x: x.lower()) df["txt"] = df["txt"].replace("[^[:alpha:]]", " ", regex=True) df["txt"] = df["txt"].replace("\\s+", " ", regex=True) tfidf_vec = TfidfVectorizer(ngram_range=(1, 3), sublinear_tf=True, stop_words=stopWords_rus, max_features=5500) full_tfidf = tfidf_vec.fit_transform(df['txt'].values.tolist()) # for i in range(5500): # df['tfidf_' + str(i)] = full_tfidf[:, i] extract_columns = [ 'region', 'parent_category_name', 'category_name', 'price', 'item_seq_number', 'user_type', 'image_top_1', 'mon', 'mday', 'week', 'wday' ] print("[+] Stack more features ...") for c in extract_columns: full_tfidf = hstack([full_tfidf, df[c].as_matrix()]) full_tfidf = full_tfidf.tocsr() print("[+] Create y_train ...") y_train = train_df["deal_probability"].as_matrix() y_train = np.asarray(y_train) extracted_features_root = config["extracted_features"] utils.save_features(full_tfidf.tocsr(), root=extracted_features_root, name="X_train_xgboost") utils.save_features(y_train, root=extracted_features_root, name="y_train_xgboost")
X_train, y_train, X_test, y_test, num_classes = dataset.load_Iris(0.3) # model setup layers = [Vector(args.layers, eta=args.eta, eps=args.eps)] model = Architecture(layers, model_dir, num_classes) # train/test pass print("Forward pass - train features") Z_train = model(X_train, y_train) utils.save_loss(model.loss_dict, model_dir, "train") print("Forward pass - test features") Z_test = model(X_test) utils.save_loss(model.loss_dict, model_dir, "test") # save features utils.save_features(model_dir, "X_train", X_train, y_train) utils.save_features(model_dir, "X_test", X_test, y_test) utils.save_features(model_dir, "Z_train", Z_train, y_train) utils.save_features(model_dir, "Z_test", Z_test, y_test) # evaluation train _, acc_svm = evaluate.svm(Z_train, y_train, Z_train, y_train) acc_knn = evaluate.knn(Z_train, y_train, Z_train, y_train, k=5) acc_svd = evaluate.nearsub(Z_train, y_train, Z_train, y_train, n_comp=1) acc = {"svm": acc_svm, "knn": acc_knn, "nearsub-svd": acc_svd} utils.save_params(model_dir, acc, name="acc_train.json") # evaluation test _, acc_svm = evaluate.svm(Z_train, y_train, Z_test, y_test) acc_knn = evaluate.knn(Z_train, y_train, Z_test, y_test, k=5) acc_svd = evaluate.nearsub(Z_train, y_train, Z_test, y_test, n_comp=1)
skf = KFold(n_folds) for fold, (train_index, val_index) in enumerate(skf.split(X)): print(f"\n[+] Fold {fold}") if fold in skip_fold: print(f"[+] Fold {fold} is skipped") continue X_train = X[train_index] y_train = y[train_index] X_valid = X[val_index] y_valid = y[val_index] # Save val index and test index to file utils.save_features(np.asarray(train_index), lgb_root, f"train_index_fold_{fold}") utils.save_features(np.asarray(val_index), lgb_root, f"val_index_fold_{fold}") print("Light Gradient Boosting Regressor") lgbm_params = { 'task': 'train', 'boosting_type': 'gbdt', 'objective': 'regression', 'metric': 'rmse', 'max_depth': 15, 'num_leaves': 35, 'feature_fraction': 0.7, 'bagging_fraction': 0.8, # 'bagging_freq': 5, 'learning_rate': 0.019,
def run_predict(args, model, tokenizer, logger, batch_size, cuda_dev, is_sim_running=False, is_run_force=False): if args.dataset == 0: # DUC duc_base = os.path.dirname(args.DUC_data_path[0]) if args.split == 'train': data_path = os.path.join(args.base_path, duc_base, args.data_type, 'train') sum_path = data_path else: data_path = os.path.join(args.base_path, duc_base, args.data_type, 'test') sum_path = data_path text_cls = readDUCorTACText(data_path, sum_path=sum_path, is_duc=True, data_st=args.data_start, data_en=args.data_end) elif args.dataset == 1: # TAC tac_base = os.path.dirname(args.TAC_data_path[0]) if args.split == 'train': data_path = os.path.join(args.base_path, tac_base, args.data_type, 'train') sum_path = data_path else: data_path = os.path.join(args.base_path, tac_base, args.data_type, 'test') sum_path = data_path text_cls = readDUCorTACText(data_path, sum_path=sum_path, is_duc=False, data_st=args.data_start, data_en=args.data_end) BERT_base_dir = os.path.join(data_path, 'BERT_features', 'extractions') if not os.path.exists(BERT_base_dir): os.makedirs(BERT_base_dir) # retrieve text data text_docs = text_cls.text Y = text_cls.Y name = text_cls.name pos = text_cls.pos seg_pos = text_cls.seg y_name_pos_file = os.path.join( BERT_base_dir, '{}_y_name_pos_{}-{}.pkl'.format(args.split, args.data_start, args.data_end)) save_features(y_name_pos_file, {'Y': Y, 'name': name, 'pos': pos}) logger.write('docs files are saved in {}.'.format(y_name_pos_file)) pred_fn = 'sim' if is_sim_running else 'imp' pred_fn = '{}_{}_{}-{}'.format(args.split, pred_fn, args.data_start, args.data_end) pred_file = os.path.join(BERT_base_dir, pred_fn) pool_fn = 'imp_vector' pool_fn = '{}_{}_{}-{}'.format(args.split, pool_fn, args.data_start, args.data_end) if not is_sim_running: pool_file = os.path.join(BERT_base_dir, pool_fn) if not os.path.exists(pred_file) or is_run_force: st_ext = time.time() if is_sim_running: pred_list = predict_sim(model, tokenizer, text_docs, 128, batch_size, cuda_dev, name, seg_pos) else: pred_list, pool_list = predict_imp(model, tokenizer, text_docs, pos, 512, batch_size, cuda_dev) elpased_time = time.time() - st_ext logger.write('prediction time for {} is {}sec: avg. {}sec/doc.'.format( data_path, elpased_time, elpased_time / len(text_docs))) save_features(pred_file, pred_list) logger.write('{} file stored!'.format(pred_file)) if not is_sim_running: save_features(pool_file, pool_list) logger.write('{} file stored!'.format(pool_file)) else: logger.write('{} file exists... skip prediction!'.format(pred_file)) if not is_sim_running: logger.write( '{} file exists... skip prediction!'.format(pool_file))
cls_scores = cls_prob[:, cls_ind] dets = np.hstack((boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(cpu_nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= CONF_THRESH)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] image_feat = pooled_feat[keep_boxes] if args.save_boxes: image_bboxes = boxes[keep_boxes] all_images_boxes.append(image_bboxes) else: all_images_boxes = None all_images_feats.append(image_feat) #torch.cuda.empty_cache() #transforms array to numpy array all_images_feats = np.asarray(all_images_feats, dtype=np.float16) if args.save_boxes: all_images_boxes = np.asarray(all_images_boxes, dtype=np.float16) #save the files output_file = os.path.join(args.output_dir, args.out_file_name+'.npy') save_features(output_file, all_images_feats, all_images_boxes)
# # save features for VGG16 at 3 different input scales # from keras.applications.vgg16 import VGG16 # from keras.applications.vgg16 import preprocess_input # model = VGG16(weights='imagenet', include_top=False) # # for n in [224,128,64]: # input_shape = (n,n,3) # new_preprocess = lambda x: preprocess_input(utils.pad_image(x, input_shape)) # features = utils.features_from_image(all_logos, model, new_preprocess) # utils.save_features('vgg16_logo_features_{}.hdf5'.format(n), features, brand_map, input_shape) from keras.applications.nasnet import NASNetMobile from keras.applications.nasnet import preprocess_input model_out = NASNetMobile(weights='imagenet', include_top=False) input_shape = (224, 224, 3) new_preprocess = lambda x: preprocess_input(utils.pad_image( x, input_shape)) features = utils.features_from_image(all_logos, model, new_preprocess) utils.save_features('NASNet_logo_features_{}.hdf5'.format(224), features, brand_map, input_shape) # from keras.applications.nasnet import NASNetLarge # from keras.applications.nasnet import preprocess_input # model_out = NASNetLarge(weights='imagenet', include_top=False) # input_shape = (331, 331, 3) # # new_preprocess = lambda x: preprocess_input(utils.pad_image(x, input_shape)) # features = utils.features_from_image(all_logos, model, new_preprocess) # utils.save_features('NASNet_logo_features_{}.hdf5'.format(331), features, brand_map, input_shape)
def main(): # Load json config config = json.load(open("config.json")) with utils.timer("Load csv"): print("[+] Load csv ...") train_df = load_csv(config["train_csv"]) test_df = load_csv(config["test_csv"]) with utils.timer("Create token"): print("[+] Create token ...") token = create_token(train_df) with utils.timer("Tokenize data"): print("[+] Tokenize data ...") train_token_data = tokenize_data(train_df, token) test_token_data = tokenize_data(test_df, token) y_train = train_df["deal_probability"].as_matrix() train_df = train_df.drop("deal_probability", axis=1) df = pd.concat([train_df, test_df], ignore_index=True) n_train = len(train_df) del train_df del test_df gc.collect() with utils.timer("Extract time features"): print("[+] Convert date to day of week ...") df = date_to_dow(df) with utils.timer("Extract text features as numeric"): print("[+] Extract text features as numeric ...") df['text_feat'] = df.apply(lambda row: ' '.join( [str(row['param_1']), str(row['param_2']), str(row['param_3'])]), axis=1) # Group Param Features df = extract_text_features_as_numeric(df) with utils.timer("Extract params text features"): print("[+] Extract params text features ...") param_tfidf = extract_params_tex_features(df) with utils.timer("Extract title features"): print("[+] Extract title features ...") df, title_tfidf = title_features(df) with utils.timer("Extract description features"): print("[+] Extract description features ...") df, description_tfidf = description_features(df) with utils.timer("Extract price features"): print("[+] Extract price features ...") df = log_prices(df) X_num = [] print("[+] Extract numerical features ...") for c in num_columns: X_num.append(df[c].as_matrix()) # Numeric data X_num = np.array(X_num, dtype=np.float32).T X_train_num = X_num[:n_train] X_test_num = X_num[n_train:] print(f"[+] Numeric {X_train_num.shape}/{X_test_num.shape}") del X_num gc.collect() # Categorical data X_train_cat = np.array(train_token_data, dtype=np.int).T X_test_cat = np.array(test_token_data, dtype=np.int).T print(f"[+] Cat {X_train_cat.shape}/{X_test_cat.shape}") X_train_desc = description_tfidf[:n_train] X_test_desc = description_tfidf[n_train:] print(f"[+] Description {X_train_desc.shape}/{X_test_desc.shape}") X_train_title = title_tfidf[:n_train] X_test_title = title_tfidf[n_train:] print(f"[+] Title {X_train_title.shape}/{X_test_title.shape}") X_train_param = param_tfidf[:n_train] X_test_param = param_tfidf[n_train:] print(f"[+] Param {X_train_param.shape}/{X_test_param.shape}") print("[+] Save features ...") y_train = np.asarray(y_train) # Save token len token_len = [len(t) for t in token] extracted_features_root = config["extracted_features"] utils.save_features(X_train_num, root=extracted_features_root, name="X_train_num") utils.save_features(X_test_num, root=extracted_features_root, name="X_test_num") utils.save_features(X_train_cat, root=extracted_features_root, name="X_train_cat") utils.save_features(X_test_cat, root=extracted_features_root, name="X_test_cat") utils.save_features(X_train_desc, root=extracted_features_root, name="X_train_desc") utils.save_features(X_test_desc, root=extracted_features_root, name="X_test_desc") utils.save_features(X_train_title, root=extracted_features_root, name="X_train_title") utils.save_features(X_test_title, root=extracted_features_root, name="X_test_title") utils.save_features(X_train_param, root=extracted_features_root, name="X_train_param") utils.save_features(X_test_param, root=extracted_features_root, name="X_test_param") utils.save_features(y_train, root=extracted_features_root, name="y_train") utils.save_features(np.asarray(token_len), root=extracted_features_root, name="token_len")
feature_batch_counter = 0 tbar_batch_counter = 0 for i, data in enumerate(dataloader, 0): # if save_features, save at the beginning of an epoch if opt.feature_save and epoch % opt.feature_save_every == 0 and feature_batch_counter < opt.feature_num_batches: if len(feature_batches) < opt.feature_num_batches: eval_x, eval_y = data eval_x = eval_x.cuda() feature_batches.append((eval_x, eval_y)) # feature for real eval_x, eval_y = feature_batches[feature_batch_counter] with torch.no_grad(): eval_f = netD.get_feature(eval_x) utils.save_features( eval_f.cpu().numpy(), os.path.join( outff, f'real_epoch_{epoch}_batch_{feature_batch_counter}_f.npy')) utils.save_features( eval_y.cpu().numpy(), os.path.join( outff, f'real_epoch_{epoch}_batch_{feature_batch_counter}_y.npy')) # feature for fake with torch.no_grad(): eval_x = netG(feature_eval_noises[feature_batch_counter], feature_eval_labels[feature_batch_counter]) eval_y = feature_eval_labels[feature_batch_counter] eval_f = netD.get_feature(eval_x) utils.save_features( eval_f.cpu().numpy(),
boxes /= im_scales[0] cls_prob = cls_prob.data.cpu().numpy().squeeze() pooled_feat = pooled_feat.data.cpu().numpy() # Keep only the best detections. max_conf = np.zeros((boxes.shape[0])) for cls_ind in range(1, cls_prob.shape[1]): cls_scores = cls_prob[:, cls_ind] dets = np.hstack( (boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(cpu_nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= CONF_THRESH)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] image_feat = pooled_feat[keep_boxes] if args.save_boxes: image_bboxes = boxes[keep_boxes] else: image_bboxes = None output_file = os.path.join(args.output_dir, im_file.split('.')[0] + '.npy') save_features(output_file, image_feat, image_bboxes) #torch.cuda.empty_cache()
def compute_flfeats_offline(source_path, out_dir, feat_type, deltas=None, config_file=None): """Function to calculate the frame-level features and save them to files. The function saves one file (containing features) per utterance Args: source_path (string): Path to the wavs. out_dir (string): Type of the frame-level feature to extract from the utterances. Choose from: 'mfcc', 'fbanks', 'melspec'. Default is: 'fbanks'. feat_type (string): Type of the frame-level feature to extract from the utterances. Choose from: 'mfcc', 'fbanks', 'melspec'. Default is: 'fbanks'. deltas (int, optional): Compute delta coefficients of a tensor. '1' for first order derivative, '2' for second order. None for not using deltas. Default: None. config_file (string): Path to the configuration file (ini). """ list_wavs = utils.get_files_abspaths(path=source_path, file_type='.wav') # frame-level feats params/config from the config file params = utils.read_conf_file(file_name=config_file, conf_section='DEFAULTS') print("Computing {} for {} utterances in {}...".format( feat_type, len(list_wavs), source_path)) for wav_file in list_wavs: # Load wav waveform = utils.load_wav_torch(wav_file, max_length_in_seconds=5, pad_and_truncate=True) # Compute without derivatives if deltas == 0: # Compute features feat = execute_extraction_function(feat_type=feat_type, waveform=waveform, **params) final_dir = out_dir + '/{0}/{1}/'.format( feat_type, os.path.basename(source_path)) utils.save_features(final_dir, feat_type, wav_file, feat) utils.copy_conf(config_file, final_dir, feat_type) # Compute derivatives if asked for if deltas == 1: # Compute features feat = execute_extraction_function(feat_type=feat_type, waveform=waveform, **params) delta1 = torchaudio.functional.compute_deltas( feat) # compute 1st order feat = torch.cat((feat, delta1), 1) final_dir = out_dir + '/{0}/{1}/'.format( feat_type, os.path.basename(source_path)) utils.save_features(final_dir, feat_type, wav_file, feat) utils.copy_conf(config_file, final_dir, feat_type) if deltas == 2: # Compute features feat = execute_extraction_function(feat_type=feat_type, waveform=waveform, **params) delta1 = torchaudio.functional.compute_deltas( feat) # compute 1st order delta2 = torchaudio.functional.compute_deltas(delta1) feat = torch.cat((feat, delta1, delta2), 1) final_dir = out_dir + '/{0}/{1}/'.format( feat_type, os.path.basename(source_path)) utils.save_features(final_dir, feat_type, wav_file, feat) utils.copy_conf(config_file, final_dir, feat_type)
pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) boxes = rois.data.cpu().numpy()[:, :, 1:5].squeeze() boxes /= im_scales[0] cls_prob = cls_prob.data.cpu().numpy().squeeze() pooled_feat = pooled_feat.data.cpu().numpy() # Keep only the best detections. max_conf = np.zeros((boxes.shape[0])) for cls_ind in range(1, cls_prob.shape[1]): cls_scores = cls_prob[:, cls_ind] dets = np.hstack((boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(cpu_nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= CONF_THRESH)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] image_feat = pooled_feat[keep_boxes] if args.save_boxes: image_bboxes = boxes[keep_boxes] else: image_bboxes = None output_file = os.path.join(args.output_dir, im_file.split('.')[0]+'.npy') save_features(output_file, image_feat, image_bboxes, keep_boxes, cls_prob) #torch.cuda.empty_cache()
def __call__(self, sample, wav_file, name_set): waveform, label = sample['wave'], sample['label'] save = self.save config_file = self.config_file deltas = self.deltas out_dir = self.out_dir # frame-level feats params/config params = utils.read_conf_file(file_name=config_file, conf_section='DEFAULTS') # check if features are already computed if features do not exist, then compute them wav_name = os.path.splitext(os.path.basename(wav_file))[0] file_name = '/{0}_{1}'.format(self.feat_type, wav_name) feat_file_path = out_dir + '/' + file_name if not os.path.isfile(feat_file_path): # Compute without derivatives if deltas == 0: # Compute features feat = execute_extraction_function(feat_type=self.feat_type, waveform=waveform, **params) # Save features if asked for out_dir = out_dir + '/{0}/{1}/'.format(self.feat_type, name_set) if save: utils.save_features(out_dir, self.feat_type, wav_file, feat) utils.copy_conf(config_file, out_dir, self.feat_type) feature = {'feature': feat, 'label': label} return feature # Compute derivatives if asked for if deltas == 1: # Compute features feat = execute_extraction_function(feat_type=self.feat_type, waveform=waveform, **params) delta1 = torchaudio.functional.compute_deltas( feat) # compute 1st order feat = torch.cat((feat, delta1), 1) # Save features if asked for out_dir = out_dir + '/{0}/{1}/'.format(self.feat_type, name_set) if save: utils.save_features(out_dir, self.feat_type, '{0}_{1}del'.format(wav_file, deltas), feat) utils.copy_conf(config_file, out_dir, self.feat_type) feature = {'feature': feat, 'label': label} return feature if deltas == 2: # Compute features feat = execute_extraction_function(feat_type=self.feat_type, waveform=waveform, **params) delta1 = torchaudio.functional.compute_deltas( feat) # compute 1st order delta2 = torchaudio.functional.compute_deltas( delta1) # compute 2nd order feat = torch.cat((feat, delta1, delta2), 1) # Save features if asked for out_dir = out_dir + '/{0}/{1}/'.format(self.feat_type, name_set) if save: utils.save_features(out_dir, self.feat_type, '{0}_{1}del'.format(wav_file, deltas), feat) utils.copy_conf(config_file, out_dir, self.feat_type) feature = {'feature': feat, 'label': label} return feature # if features exist, then LOAD them else: feat = np.load(feat_file_path) feature = {'feature': feat, 'label': label} return feature
avg_loss_IQ = AverageMeter() feature_batch_counter = 0 for i, data in enumerate(dataloader, 0): # if save_features, save at the beginning of an epoch if opt.feature_save and epoch % opt.feature_save_every == 0 and feature_batch_counter < opt.feature_num_batches: if len(feature_batches) < opt.feature_num_batches: eval_x, eval_y = data eval_x = eval_x.cuda() feature_batches.append((eval_x, eval_y)) # feature for real eval_x, eval_y = feature_batches[feature_batch_counter] with torch.no_grad(): eval_f = netD.get_feature(eval_x) utils.save_features( eval_f.cpu().numpy(), os.path.join( outff, f'real_epoch_{epoch}_batch_{feature_batch_counter}_f.npy')) utils.save_features( eval_y.cpu().numpy(), os.path.join( outff, f'real_epoch_{epoch}_batch_{feature_batch_counter}_y.npy')) # feature for fake with torch.no_grad(): eval_x = netG(feature_eval_noises[feature_batch_counter], feature_eval_labels[feature_batch_counter]) eval_y = feature_eval_labels[feature_batch_counter] eval_f = netD.get_feature(eval_x) utils.save_features( eval_f.cpu().numpy(),
features = utils.features_from_image(all_logos, model, my_preprocess) return features, all_logos, brand_map if __name__ == '__main__': model, preprocess_input, input_shape = utils.load_extractor_model('InceptionV3', flavor=0) my_preprocess = lambda x: preprocess_input(utils.pad_image(x, input_shape)) print('Extracting features from LogosInTheWild database (train set) - this will take a while (~5 minutes)') features, all_logos, brand_map = extract_litw_features('data_all_train.txt', model, my_preprocess) print('Processed {} logos, transformed into feature vectors'.format(len(features))) # save inception features at default size 299*299 utils.save_features('./model_poi/inception_logo_features.hdf5', features, brand_map, input_shape) # save features for Inception with smaller input: 200 instead of 299 - last layer is 4*4 instead of 8*8 # Extract features at last layer as well as after last 3 inception blocks (mixed9,8,7) input_shape = (200,200,3) new_preprocess = lambda x: preprocess_input(utils.pad_image(x, input_shape)) trunc_layer = [-1, 279, 248, 228] for i_layer in range(4): model_out = Model(inputs=model.inputs, outputs=model.layers[trunc_layer[i_layer]].output) features = utils.features_from_image(all_logos, model_out, new_preprocess) extra = '_trunc{}'.format(i_layer) if i_layer > 0 else '' utils.save_features('./model_poi/inception_logo_features_200{}.hdf5'.format(extra), features, brand_map, input_shape)
file_n = 'imp_vector.h5' if args.dataset == 2 and i == 3 else fn file_name = os.path.join(BERT_output_dir, file_n) files = sorted(glob.glob(pattern_)) print('found {} files for {}'.format(len(files), pattern_)) if i == 0: Y_data, name_data, pos_data = [], [], [] for file in files: data = load_features(file) # 'Y': Y, 'name': name, 'pos': pos Y_data = Y_data + data['Y'] name_data = name_data + data['name'] pos_data = pos_data + data['pos'] save_features(file_name, { 'Y': Y_data, 'name': name_data, 'pos': pos_data }) else: data_all = [] for file in files: data = load_features(file) data_all = data_all + data if args.dataset == 2 and i == 3: save_features_h5(file_name, data_all) else: save_features(file_name, data_all) print('saved in {}'.format(file_name)) # convert to mat file convert2mat(BERT_output_dir, is_force=args.is_force)
def extract_feature(): MIN_BOXES = 10 MAX_BOXES = 100 N_CLASSES = 1601 CONF_THRESH = 0.2 args = parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) os.makedirs(args.output_dir, exist_ok=True) use_cuda = torch.cuda.is_available() assert use_cuda, 'Works only with CUDA' device = torch.device('cuda') if use_cuda else torch.device('cpu') # device = torch.device('cpu') cfg.CUDA = use_cuda np.random.seed(cfg.RNG_SEED) # Load the model. fasterRCNN = resnet(N_CLASSES, 101, pretrained=False) fasterRCNN.create_architecture() fasterRCNN.load_state_dict(torch.load(args.model_file)) fasterRCNN.to(device) fasterRCNN.eval() print('Model is loaded.') # Load images. imglist = os.listdir(args.image_dir) num_images = len(imglist) print('Number of images: {}.'.format(num_images)) # Extract features. for im_file in tqdm(imglist): im = cv2.imread(os.path.join(args.image_dir, im_file)) blobs, im_scales = get_image_blob(im) assert len(im_scales) == 1, 'Only single-image batch is implemented' im_data = torch.from_numpy(blobs).permute(0, 3, 1, 2).to(device) im_info = torch.tensor([[blobs.shape[1], blobs.shape[2], im_scales[0]]]).to(device) gt_boxes = torch.zeros(1, 1, 5).to(device) num_boxes = torch.zeros(1).to(device) with torch.set_grad_enabled(False): rois, cls_prob, _, _, _, _, _, _, \ pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) boxes = rois.data.cpu().numpy()[:, :, 1:5].squeeze() boxes /= im_scales[0] cls_prob = cls_prob.data.cpu().numpy().squeeze() pooled_feat = pooled_feat.data.cpu().numpy() # Keep only the best detections. max_conf = np.zeros((boxes.shape[0])) for cls_ind in range(1, cls_prob.shape[1]): cls_scores = cls_prob[:, cls_ind] dets = np.hstack( (boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(cpu_nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= CONF_THRESH)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] image_feat = pooled_feat[keep_boxes] if args.save_boxes: image_bboxes = boxes[keep_boxes] else: image_bboxes = None output_file = os.path.join(args.output_dir, im_file.split('.')[0] + '.npy') save_features(output_file, image_feat, image_bboxes)