def __init__(self, scaled_calib_mtx, scaled_target_mtx, dist, config): self.angle = config['angle'] self.height = config['height'] self.res = config['resolution'] self.fe_ext = fe.FeatureExtractor(self.angle, self.height, self.res, scaled_target_mtx, config['focal_length']) self.calib_mtx = scaled_calib_mtx self.target_mtx = scaled_target_mtx self.dist = dist self.img_area_px = self.res[0] * self.res[1] self.c_ar_thr = config['cont_area_thr'] self.margin_offset = config['margin'] self.left_mar, self.right_mar = self.margin_offset, self.res[0] - self.margin_offset self.up_mar, self.bot_mar = self.margin_offset, self.res[1] - self.margin_offset self.extent_thr = config['extent_thr'] self.max_dist_thr = config['max_distance'] all_classifiers = pickle.load(open(config['clf'], "rb")) heights = [key for key in all_classifiers.keys() if type(key) != str] # Filter the poly key out # Find the closest value among available heights closest_height = min(heights, key=lambda x: abs(x - self.height)) # All the available angles for a given height in a form of a list angles = list(all_classifiers[closest_height]) closest_angle = min(angles, key=lambda x: abs(x - self.angle)) # Find the closest value among available angles self.clf = all_classifiers[closest_height][closest_angle] self.poly = all_classifiers['poly']
def extract_features_and_generate_model(essays, algorithm=util_functions.AlgorithmTypes.regression): """ Feed in an essay set to get feature vector and classifier essays must be an essay set object additional array is an optional argument that can specify a numpy array of values to add in returns a trained FeatureExtractor object and a trained classifier """ f = feature_extractor.FeatureExtractor() f.initialize_dictionaries(essays) train_feats = f.gen_feats(essays) set_score = numpy.asarray(essays._score, dtype=numpy.int) if len(util_functions.f7(list(set_score)))>5: algorithm = util_functions.AlgorithmTypes.regression else: algorithm = util_functions.AlgorithmTypes.classification clf,clf2 = get_algorithms(algorithm) cv_error_results=get_cv_error(clf2,train_feats,essays._score) try: clf.fit(train_feats, set_score) except ValueError: log.exception("Not enough classes (0,1,etc) in sample.") set_score[0]=1 set_score[1]=0 clf.fit(train_feats, set_score) return f, clf, cv_error_results
def train_model(X_df, y_array, skf_is): fe = feature_extractor.FeatureExtractor() fe.fit(X_df, y_array) X_array = fe.transform(X_df) # Regression train_is, _ = skf_is X_train_array = np.array([X_array[i] for i in train_is]) y_train_array = np.array([y_array[i] for i in train_is]) reg = regressor.Regressor() reg.fit(X_train_array, y_train_array) return fe, reg
def extract_limit_order_book(limit_order_filename, feature_filename, time_interval=100, n_level=10): extractor = feature_extractor.FeatureExtractor( limit_order_filename=limit_order_filename, feature_filename=feature_filename, time_interval=time_interval, n_level=n_level) timestamps, basic_set, time_insensitive_set, labels = extractor.extract_features( ) print("Order book {} has {} data points".format( limit_order_filename.split('/')[-1], len(labels))) return timestamps, basic_set, time_insensitive_set, labels
def main(args): args.dataset.data_dir = hydra_utils.to_absolute_path(args.dataset.data_dir) args.dataset.save_dir = hydra_utils.to_absolute_path(args.dataset.save_dir) args.model.load_path = hydra_utils.to_absolute_path(args.model.load_path) if os.path.exists(os.path.join(args.dataset.save_dir, 'alldata.pth')): print('Found {}'.format( os.path.join(args.dataset.save_dir, 'alldata.pth'))) sys.exit(0) extractor = feature_extractor.FeatureExtractor(args) allimagelist = [] allinstances = [] # ALOI 1000 instances for i in range(1, 1001): dname = os.path.join(args.dataset.data_dir, '{}'.format(i)) filenames = sorted(glob.glob(os.path.join(dname, '*'))) for fname in filenames: allimagelist.append(fname) allinstances.append(i) print('Found {} images'.format(len(allimagelist))) # Write to temp files in expected format with tempfile.NamedTemporaryFile(delete=False) as tmpfile: tmpfilelist = tmpfile.name with open(tmpfilelist, 'w') as f: for fname in allimagelist: f.write(fname + '\n') savename = os.path.join(args.dataset.save_dir, 'alldata.pth') if not os.path.exists(savename): print('File does not exist: {}'.format(savename)) # Extract Features args.dataset.filelist = tmpfilelist extractor.create_dataloader(args) os.makedirs(args.dataset.save_dir, exist_ok=True) feature_dict = extractor.extract_features() feat_size = len(feature_dict[allimagelist[0]]) num_feat = len(allimagelist) catfeat = np.zeros((num_feat, feat_size)) for fi, fname in enumerate(allimagelist): catfeat[fi, :] = feature_dict[fname] out = { 'feat': catfeat, 'instance': allinstances, 'classes': allinstances, } torch.save(out, savename)
def train_submission(module_path, X_df, y_array, train_is): # Preparing the training set X_train_df = X_df.iloc[train_is] y_train_array = y_array[train_is] # Feature extraction import feature_extractor fe = feature_extractor.FeatureExtractor() fe.fit(X_train_df, y_train_array) X_train_array = fe.transform(X_train_df) import regressor reg = regressor.Regressor() reg.fit(X_train_array, y_train_array) return fe, reg
frame_step += 1 # close the video pbar.close() cap.release() cv2.destroyAllWindows() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--fps', type=int, default = 20, help= "frames per second") parser.add_argument('--train_filename', type=str, default="./data/train.mp4", help="path to training video filename") parser.add_argument('--verbose', type=int, default=1, help='Boolean (0, 1) whether to print variables') parser.add_argument('--max_frames', type=int, default=100, help="max number of frames to analyse, set to -1 to complete whole video") parser.add_argument('--show_keypoints', type=int, default=0, help="Boolean (0, 1) whether to show the video") parser.add_argument('--show_matches', type=int, default=1, help="Boolean (0, 1) whether to show the matches between frames") parser.add_argument('--num_features', type=int, default=10, help="number of features used to match frames.") parser.add_argument('--nr_matches', type=int, default=20, help='Number of matches to use for tracking between frames') parser.add_argument('--width', type=int, default=640, help="width of frames") parser.add_argument('--height', type=int, default=480, help="height of frames") config, _ = parser.parse_known_args() config.verbose = bool(config.verbose) config.show_keypoints = bool(config.show_keypoints) config.show_matches = bool(config.show_matches) FE = feature_extractor.FeatureExtractor(config) main(config.train_filename, config)
for d_inst, v_inst in zip(X_dict, vf_dict) ] return X_dict, y_array if __name__ == '__main__': print("Reading file ...") X_dict, y_array = read_data(train_filename, vf_train_filename) skf = StratifiedShuffleSplit(y_array, n_iter=2, test_size=0.5, random_state=57) print("Training file ...") for valid_train_is, valid_test_is in skf: X_valid_train_dict = [X_dict[i] for i in valid_train_is] y_valid_train = y_array[valid_train_is] X_valid_test_dict = [X_dict[i] for i in valid_test_is] y_valid_test = y_array[valid_test_is] fe = feature_extractor.FeatureExtractor() fe.fit(X_valid_train_dict, y_valid_train) X_valid_train_array = fe.transform(X_valid_train_dict) X_valid_test_array = fe.transform(X_valid_test_dict) clf = classifier.Classifier() clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic') clf_c.fit(X_valid_train_array, y_valid_train) y_valid_pred = clf_c.predict(X_valid_test_array) y_valid_proba = clf_c.predict_proba(X_valid_test_array) #print y_valid_proba print 'accuracy = ', accuracy_score(y_valid_pred, y_valid_test)
config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) random.seed(1) settings = {} if __name__ == "__main__": settings = parse_settings(sys.argv[1]) settings["ANCHORS"] = np.reshape(settings["ANCHORS"], [settings["DETECTORS"], 2]) run_meta = tf.RunMetadata() with tf.Session(graph=tf.Graph()) as sess: K.set_session(sess) K.set_learning_phase(0) fe = feature_extractor.FeatureExtractor(settings) nets = { "yolo_v1": (fe.yolo_convolutional_net, 0), "shot_yolo_A": (fe.shot_yolo_convolutional_net_A, 0), "shot_yolo_B": (fe.shot_yolo_convolutional_net_B, 0), "shot_yolo_C": (fe.shot_yolo_convolutional_net_C, 0), "yolo_tiny": (fe.tiny_yolo_convolutional_net, 0), "inceptionv3": (fe.inceptionv3_convolutional_net, 2), "mobilenetv2": (fe.mobilenetv2_convolutional_net, 0), "xception": (fe.xception_convolutional_net, 0) } net = nets[settings["NET_ARCH"]][0](1) out_dim_factor = nets[settings["NET_ARCH"]][1]
def __init__(self): self.extractor = feature_extractor.FeatureExtractor()
def __init__(self, classifier): self.su = string_util.StringUtil() self.featureExtractor = feature_extractor.FeatureExtractor() self.classifier = classifier
def main(args): args.dataset.data_dir = hydra_utils.to_absolute_path(args.dataset.data_dir) args.dataset.save_dir = hydra_utils.to_absolute_path(args.dataset.save_dir) args.model.load_path = hydra_utils.to_absolute_path(args.model.load_path) print(args.pretty()) # Set seed so same samples are chosen np.random.seed(1992) # If data exists, save time and skip this if os.path.exists(os.path.join(args.dataset.save_dir, 'alldata.pth')): print('Found {}'.format( os.path.join(args.dataset.save_dir, 'alldata.pth'))) sys.exit(0) extractor = feature_extractor.FeatureExtractor(args) # Frame level attributes attributes = ['absence', 'cover'] with open(os.path.join(args.dataset.data_dir, 'list.txt'), 'r') as f: vids = f.read().splitlines() # Too many videos? if len(vids) > args.dataset.max_videos: vids = vids[::len(vids) // args.dataset.max_videos] allimagelist = [] allboxes = [] allann = [] allframeids = [] vid_to_class = {} for vid in tqdm(vids, desc='Reading GOT Annotations'): # Load video metadata metafile = os.path.join(args.dataset.data_dir, vid, 'meta_info.ini') with open(metafile, 'r') as f: data = f.read().splitlines()[1:] data = [ d.split(': ')[1].replace(' ', '_') for d in data if 'object_class' in d ] vid_to_class[vid] = data[0] # Read bounding boxes boxfile = os.path.join(args.dataset.data_dir, vid, 'groundtruth.txt') with open(boxfile, 'r') as f: boxtext = f.read() boxes = boxtext.splitlines()[::args.dataset.stride] allboxes.append(boxes) num_boxes = len(boxtext.splitlines()) fname_fmt = os.path.join(args.dataset.data_dir, vid, '{:08d}.jpg') imagelist = [fname_fmt.format(i + 1) for i in range(num_boxes)] imagelist = imagelist[::args.dataset.stride] allimagelist.append(imagelist) # Load absence and occlusion annotations thisann = [] for attr in attributes: ann_file = os.path.join(args.dataset.data_dir, vid, attr + '.label') if not os.path.exists(ann_file): ann = [-1] * num_boxes else: with open(ann_file, 'r') as f: ann = list(map(int, f.read().splitlines())) thisann.append(np.array(ann[::args.dataset.stride])) allann.append(thisann) allframeids.append(np.arange(num_boxes)[::args.dataset.stride]) print('Found {} images'.format( sum([len(imlist) for imlist in allimagelist]))) # Create temp file list to extract features with tempfile.NamedTemporaryFile(delete=False) as tmpfile: tmpfilelist = tmpfile.name with tempfile.NamedTemporaryFile(delete=False) as tmpfile: tmpboxlist = tmpfile.name with open(tmpfilelist, 'w') as f: for flist in allimagelist: for fname in flist: f.write(fname + '\n') with open(tmpboxlist, 'w') as f: for boxes in allboxes: for box in boxes: f.write(box + '\n') # Extract Features if not os.path.exists(os.path.join(args.dataset.save_dir, vids[0] + '.pth')): args.dataset.filelist = tmpfilelist args.dataset.boxlist = tmpboxlist extractor.create_dataloader(args) os.makedirs(args.dataset.save_dir, exist_ok=True) feature_dict = extractor.extract_features() feat_size = len(feature_dict[allimagelist[0][0]]) # Save features for each video for ci, vid in enumerate(tqdm(vids, desc='Saving features')): num_feat = len(allimagelist[ci]) vidfeat = np.zeros((num_feat, feat_size)) for fi, fname in enumerate(allimagelist[ci]): vidfeat[fi, :] = feature_dict[fname] boxes = allboxes[ci] boxes = [list(map(float, box.split(','))) for box in boxes] boxes = np.array(boxes) out = { 'feat': vidfeat, 'boxes': boxes, 'ids': allframeids[ci], 'class': vid_to_class[vid] } attribute_ann = { attributes[i]: allann[ci][i] for i in range(len(attributes)) } out.update(attribute_ann) savename = os.path.join(args.dataset.save_dir, vid + '.pth') torch.save(out, savename) # Merge features for good videos (heuristic - see paper) features = [] instances = [] allcovers = [] allclasses = [] count = 0 for ci, vid in enumerate(tqdm(vids, desc='Accumulating features')): savename = os.path.join(args.dataset.save_dir, vid + '.pth') data = torch.load(savename) feat = data['feat'] cover = data['cover'] uniq_covers = np.unique(cover) if uniq_covers.max() - uniq_covers.min() < 4: continue chosen_inds = [] for c in uniq_covers: chosen_inds.append(np.random.choice(np.where(cover == c)[0])) allcovers.extend(list(uniq_covers)) feat = feat[np.array(chosen_inds)] features.append(feat) instances.extend([count] * len(chosen_inds)) allclasses.extend([data['class']] * len(chosen_inds)) count += 1 features = np.vstack(features) torch.save( { 'feat': features, 'instance': instances, 'cover': allcovers, 'classes': allclasses }, os.path.join(args.dataset.save_dir, 'alldata.pth'))