def run_evaluation(input_dim,net_name, saved_model_name,skip=0): num_classes = len(CLASSES) + 1 # +1 background cfg = get_config(net_name+str(input_dim)) net_class = get_net(net_name) net = net_class(input_dim,'test', num_classes,cfg) # initialize SSD net.load_state_dict(torch.load(saved_model_name)) net.eval() print('Finished loading model!') # load data if DATASET_NAME == 'KAIST': dataset = GetDataset(args.voc_root, BaseTransform(input_dim, dataset_mean), AnnotationTransform(),dataset_name='test20',skip=skip) elif DATASET_NAME == 'VOC0712': dataset = GetDataset(args.voc_root, BaseTransform(input_dim, dataset_mean), AnnotationTransform(),[('2007','test')]) elif DATASET_NAME == 'Sensiac': dataset = GetDataset(args.voc_root, BaseTransform(input_dim, dataset_mean), AnnotationTransform(),dataset_name='day_test10') elif DATASET_NAME == 'Caltech': dataset = GetDataset(args.voc_root, BaseTransform(input_dim, dataset_mean), AnnotationTransform(), dataset_name='test01', skip=skip) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation map, mam = test_net(args.save_folder, net, args.cuda, dataset, BaseTransform(net.size, dataset_mean), args.top_k, input_dim, thresh=args.confidence_threshold) return map, mam
def ReportPerfCV(model, feature_set, y, calibrated=False, n_folds=5, short=False): kcv = StratifiedKFold(y, n_folds, shuffle=True) i = 1 res = np.empty((len(y), len(np.unique(y)))) X, Xtest = GetDataset(feature_set) if calibrated: logger.info("Enabling probability calibration...") model = CalibratedClassifierCV(model, 'sigmoid', cv=n_folds - 1) for train_idx, valid_idx in kcv: logger.info("Running fold %d...", i) model.fit(X[train_idx], y[train_idx]) logger.info("Fold %i Accuracy: %.4f", i, model.score(X[valid_idx], y[valid_idx])) res[valid_idx, :] = model.predict_proba(X[valid_idx]) logger.info("Fold %i Log Loss: %.4f", i, log_loss(y[valid_idx], res[valid_idx])) i += 1 if short: break if short: return -log_loss(y[valid_idx], res[valid_idx]) yhat = np.argmax(res, axis=1) + 1 Y = np.array([int(i[-1]) for i in y]) logger.info("CV Accuracy: %.5f", accuracy_score(Y, yhat)) logger.info("CV Log Loss: %.4f", log_loss(y, res)) return res, -log_loss(y, res)
def GetPrediction(model, feature_set, y, train=None, valid=None, preds="proba", verbose=1): model_name = model.__class__.__name__ params = INITIAL_PARAMS.get(model_name, {}) model.set_params(**params) y = y if train is None else y[train] model_feat = stringify(model, feature_set) try: with open('../Params/Best/%s_saved_params.json' % model_feat) as f: saved_params = json.load(f).get(model_feat, {}) except IOError: logging.warning( "Could not find best parameter for %s with feature \ set %s", model_name, feature_set) saved_params = {} return False for key in saved_params.keys(): logger.info("%s: %s", key, saved_params[key]) ### Fixing Unicode String issues if type(saved_params[key]) is unicode: saved_params[key] = str(saved_params[key]) if 'verbose' in model.get_params(): model.set_params(verbose=verbose) X, Xtest = GetDataset(feature_set, train, valid, ensemble_list=CONFIG['ensemble_list']) model.set_params(**saved_params) logger.info("Fitting %s on %s feature", model_name, feature_set) model.fit(X, y) logger.info("Returning prediction") if preds == "proba": yhat = model.predict_proba(Xtest) elif preds == "class": yhat = model.predict(Xtest) else: logger.warning("preds must be either proba or class") return False return yhat
profitability += 1 trading_route.append(trading_quantity) return np.array(trading_route), profitability/trades if __name__ == '__main__': # freeze the seed to get static results torch.manual_seed(0) from src.hyperparameters import * plt_shape = [] prof_values = [] # linear and MLP have the different inputs from the LSTM, load their inputs first, then do the lstm later if 'linear' in blocks or 'MLP' in blocks: train_features, train_targets, test_features, test_targets = gd.get_dataset_by_category("commodities", 0.9, aggregate_days=5, target_lookahead=target_lookahead, assets_to_view=features, normalize_data=normalize_data) # depending on the length of the lookahead, some of the inputs might not be populated, # trim them from the dataset train_features = [elem for elem in train_features if elem.shape[0] > 0] train_targets = [elem for elem in train_targets if elem.shape[0] > 0] train_features = np.concatenate(train_features).astype(np.float32) train_targets = np.concatenate(train_targets).astype(np.float32) test_features = [elem for elem in test_features if elem.shape[0] > 0] test_targets = [elem for elem in test_targets if elem.shape[0] > 0] if 'linear' in blocks: print('----- Linear -----') # create the handler and model linear = LinearHandler(epochs, loss_function, None, 0.01, batch_size, l1enable=regularization) linear.create_model(train_features.shape[1], 1, dropout)
def FindParams(model, feature_set, y, CONFIG, subsample=None, grid_search=True): """ Return parameter set for the model, either found through cross validation grid search, or load from file """ ### Setting configurations model_name = model.__class__.__name__ if model.__class__.__name__ in [ 'SGDClassifier', 'KNeighborsClassifier', 'AdaBoostClassifier' ]: scorer = Accuracy # SGD can not predict probability else: scorer = logLoss if model.__class__.__name__ in [ 'ExtraTreesClassifier', 'BoostedTreesClassifier', 'MultilayerPerceptronClassifier', 'DBN', 'CalibratedClassifierCV' ]: nCores = 1 else: nCores = CONFIG['nCores'] ### Setting parameters params = INITIAL_PARAMS.get(model_name, {}) model.set_params(**params) y = y if subsample is None else y[subsample] model_feat = stringify(model, feature_set) logger.info("Start RandomizedSearchCV paramaeter for %s", model_feat) logger.info("nCores: %d, nGrid: %d, job_id: %s" % (nCores, CONFIG['nGrids'], job_id)) logger.info("Scorer: %s", scorer.__class__.__name__) try: with open('../Params/RandomizedSearchCV/%s_saved_params.json' % model_feat) as f: saved_params = json.load(f) except IOError: saved_params = {} if (grid_search and stringify(model, feature_set) not in saved_params): ### Fit Model X, _ = GetDataset(feature_set, ensemble_list=CONFIG['ensemble_list']) clf = RandomizedSearchCV(model, PARAM_GRID[model_name], scoring=scorer, cv=5, n_iter=CONFIG['nGrids'], n_jobs=nCores, random_state=CONFIG['SEED'], verbose=2) clf.fit(X, y) ### Reporting logger.info( "Found params (%s > %.4f): %s" % (stringify(model, feature_set), clf.best_score_, clf.best_params_)) for fit_model in clf.grid_scores_: logger.info("MeanCV: %.4f", fit_model[1]) for para, para_value in fit_model[0].iteritems(): if para != 'bounds': logger.info("%20s: %10s", para, para_value) else: logger.info("Bound with length %d: ", len(para_value)) ### Save Parameters params.update(clf.best_params_) saved_params[stringify(model, feature_set)] = params with open( '../Params/RandomizedSearchCV/%s_%s_saved_params.json' % (model_feat, job_id), 'w') as f: json.dump(saved_params, f, indent=4, separators=(',', ': '), ensure_ascii=True, sort_keys=True) else: params.update(saved_params.get(stringify(model, feature_set), {})) if grid_search: logger.info("Using params %s: %s" % (model_feat, params)) pickle.dump(clf.grid_scores_, open('../Employment/MPC15/' + \ model_feat + job_id + '.pkl', 'w')) return params
'step_size': LogUniform(.0001, 1.), 'max_depth': UniformInt(2, 50), 'row_subsample': Uniform(.3, 1.), 'column_subsample': Uniform(.3, 1.), 'min_child_weight': LogUniform(.01, 100), 'min_loss_reduction': Uniform(0.0001, 10) } from gl import BoostedTreesClassifier logger2 = logging.getLogger('graphlab') logger2.setLevel(logging.CRITICAL) CONFIG['ensemble_list'] = [ 'btc', 'btc2', 'btc3', 'btc4', 'svc', 'svc2', 'svc3', 'nn', 'nn2', 'nic', 'mpc', 'knc', 'etc', 'cccv', 'log', 'crfcbag', 'cetcbag', 'keras' ] X, Xtest = GetDataset('ensemble', ensemble_list=CONFIG['ensemble_list']) print "lkjr" clf = GaussianProcessCV(estimator=BoostedTreesClassifier(verbose=False), param_distributions=param_distributions, kernel=DoubleExponential, scoring=LogLoss, mu_prior=-1., sigma_prior=.30, sig=.01, cv=5, max_iter=55, random_state=1, time_budget=24 * 3600) clf.fit(X, y) clf = RandomSearchCV(estimator=BoostedTreesClassifier(verbose=False), param_distributions=param_distributions,
default='weights/PDN512_Caltech_visible.pth', type=str, help='Trained state_dict file path to open') args = parser.parse_args() # from models import build_ssd as build_ssd_v1 # uncomment for older pool6 model num_classes = len(labels) + 1 input_dim = int(args.input_dim) cfg = get_config(args.net + args.input_dim) net_class = get_net(args.net) net = net_class(input_dim, 'test', num_classes, cfg) # initialize SSD net.load_weights(args.trained_model) testset = GetDataset(DatasetRoot, None, AnnotationTransform(), dataset_name='test01') for index in range(1000, testset.num_samples): # if i%10 != 0.0: # continue _t = Timer() _t.tic() image = testset.pull_image(index) img_height, img_width = image.shape[:2] _, anno = testset.pull_anno(index, img_width, img_height) rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # View the sampled input image before transform x = cv2.resize(image, (input_dim, input_dim)).astype(np.float32) x -= (104.0, 117.0, 123.0)
def validation(net, skip): net.eval() ### Load testing data if DATASET_NAME == 'KAIST': dataset = GetDataset(args.voc_root, BaseTransform(image_size, means), AnnotationTransform(), dataset_name='test20', skip=skip) elif DATASET_NAME == 'VOC0712': dataset = GetDataset(args.voc_root, BaseTransform(image_size, means), AnnotationTransform(), [('2007', 'test')]) elif DATASET_NAME == 'Sensiac': dataset = GetDataset(args.voc_root, BaseTransform(image_size, means), AnnotationTransform(), dataset_name='day_test10') elif DATASET_NAME == 'Caltech': dataset = GetDataset(args.voc_root, BaseTransform(image_size, means), AnnotationTransform(), dataset_name='test01', skip=skip) num_images = len(dataset) all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] _t = {'im_detect': Timer(), 'misc': Timer()} output_dir = get_output_dir( DATASET_NAME + "_" + args.net + args.input_dim + "_120000", DATASET_NAME) det_file = os.path.join(output_dir, 'detections.pkl') index = 0 for i in range(num_images): im, gt, h, w = dataset.pull_item(i) # if not len(gt): ### some image dont have gt # continue index = index + 1 x = Variable(im.unsqueeze(0)) if args.cuda: x = x.cuda() _t['im_detect'].tic() detections = net(x).data detect_time = _t['im_detect'].toc(average=False) print("%s/%s time:%s" % (index, num_images, detect_time)) # skip j = 0, because it's the background class for j in range(1, detections.size(1)): dets = detections[0, j, :] mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, 5) if dets.dim() == 0: continue boxes = dets[:, 1:] boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h scores = dets[:, 0].cpu().numpy() cls_dets = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])) \ .astype(np.float32, copy=False) all_boxes[j][i] = cls_dets #all boxes format [classes(2)][num_images][coordinates and score] # print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, # num_images, detect_time)) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') map, mam = evaluate_detections(all_boxes, output_dir, dataset) return map, mam
def train(): parallel_net.train() # loss counters # loc_loss = 0 # epoch # conf_loss = 0 # epoch = 0: print('Loading Dataset...') dataset = GetDataset(args.voc_root, SSDAugmentation(image_size, means, type=args.img_type), AnnotationTransform(), type=args.img_type) epoch_size = len(dataset) // args.batch_size print('Training SSD on', dataset.name) step_index = 0 batch_iterator = None data_loader = data.DataLoader(dataset, batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) for iteration in range(start_iter, args.iterations): if (not batch_iterator) or (iteration % epoch_size == 0): # create batch iterator batch_iterator = iter(data_loader) if iteration in args.step_values: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # loc_loss = 0 # conf_loss = 0 # epoch += 1 # load train data images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [ Variable(anno.cuda(), volatile=True) for anno in targets ] else: images = Variable(images) targets = [Variable(anno, volatile=True) for anno in targets] # forward t0 = time.time() out = parallel_net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() # loc_loss += loss_l.data[0] # conf_loss += loss_c.data[0] if iteration % args.log_step == 0: print('Timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ') logger.scalar_summary("bbox_regression_loss", loss_l.data[0], iteration) logger.scalar_summary("classification_loss", loss_c.data[0], iteration) logger.scalar_summary("total_loss", loss.data[0], iteration) if args.send_images_to_tensorboard and iteration % args.save_images_step == 0: logger.image_summary("agumentation images", images.data.cpu().numpy(), iteration) if (iteration + 1) % args.model_save_step == 0: print('Saving state, iter:', iteration) save_path = 'weights/' + args.net + args.input_dim + '_' + DATASET_NAME + "_" + args.img_type + "_" + repr( iteration) + '.pth' torch.save(net.state_dict(), save_path) if args.validation and (iteration + 1) % args.validation_step == 0: ####evaluation########## print("runing evaluation!!!!") # map, mam = evaluation.run_evaluation(input_dim=image_size,net_name= args.net, saved_model_name=save_path,skip=300) net.set_phase("test") map, mam = validation(net, skip=args.validation_data_skip) net.set_phase("train") logger.scalar_summary("mAP", map, iteration) logger.scalar_summary("Average_Missing_Rate", mam, iteration) torch.save( net.state_dict(), args.save_folder + args.net + args.input_dim + '_' + DATASET_NAME + "_" + args.img_type + '.pth')