def main(): X_train, Y_train, X_test = my_features() Y_pred = my_classifier_predictions(X_train,Y_train,X_test) # print Y_pred.shape utils.generate_submission("../deliverables/test_features.txt",Y_pred)
def main(): X_train, Y_train, X_test = my_features() # my_classifier_predictions(X_train,Y_train,X_test,Y_test) Y_pred = my_classifier_predictions(X_train, Y_train, X_test) # print('test:') # print(roc_auc_score(Y_test,Y_pred)) utils.generate_submission("../deliverables/test_features.txt", Y_pred)
def main(): test_events = pd.read_csv('../data/test/events.csv') feature_map = pd.read_csv('../data/test/event_feature_map.csv') my_features(test_events,feature_map) X_train, Y_train = utils.get_data_from_svmlight("../deliverables/features_svmlight.train") X_test, Y_test = utils.get_data_from_svmlight("../deliverables/test_features.train") Y_pred= my_classifier_predictions(X_train,Y_train,X_test) utils.generate_submission("../deliverables/test_features.txt",Y_pred)
def my_classifier_predictions(X_train, Y_train, X_test): #TODO: complete this model = train_model(X_train, Y_train) model_train_pred = model.predict_proba(X_train) model_test_pred = model.predict_proba(X_test) utils.generate_submission("../deliverables/test_features.txt", model.predict_proba(X_test)[:, 1]) return model.predict(X_test).astype(int)
def main(): X_train, Y_train, X_test = my_features() Y_pred = my_classifier_predictions(X_train, Y_train, X_test) # a1 = accuracy_score(Y_train, Y_pred) # auc1 = roc_auc_score(Y_train, Y_pred) # print "Accuracy: "+str(a1) # print "AUC: "+str(auc1) #print Y_pred utils.generate_submission("../deliverables/test_features.txt", Y_pred)
def main(): X_train, Y_train, X_test = my_features() my_classifier_predictions(X_train,Y_train,X_test) Y_pred = my_classifier_predictions(X_train,Y_train,X_test) y_pred_proba = my_classifier_predictions_proba(X_train, Y_train, X_test) evaluate_model(X_train, Y_train) utils.generate_submission("../deliverables/test_features.txt",Y_pred) generate_submission_proba("../deliverables/test_features.txt",y_pred_proba)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-s', help='Settings file') args = parser.parse_args() with open(args.s) as config_file: config = json.load(config_file) path_to_out = config.get('out_path') is_available_cuda = config.get('cuda', False) test_ds = CancerDataset(csv_file=config.get('data_csv'), root_dir=config.get('data'), transform_image=albumentations.Compose([ albumentations.Resize(int(config.get('width')), int(config.get('height'))), albumentations.Normalize(), AT.ToTensor() ])) loader_test = DataLoader(test_ds, batch_size=config.get('batch_size'), num_workers=1) submission_names = test_ds.get_train_img_names() model = fm.get_dense_net_121(pretrained=False) model.load_state_dict(torch.load(config.get('model'))) model.eval() if is_available_cuda: model.cuda() predicted_labels = [] pbar = tqdm(loader_test) for batch_idx, data in enumerate(pbar): with torch.no_grad(): if is_available_cuda: data = Variable(data[0].cuda(), requires_grad=False) else: data = Variable(data[0], requires_grad=False) y_predicted = model(data) y_predicted = torch.sigmoid(y_predicted) for predicted in y_predicted: predicted_labels.append(predicted.cpu().numpy()[0]) del data del y_predicted predicted_labels = numpy.array(predicted_labels) utils.generate_submission(submission_names, predicted_labels, path_to_out)
def main(): X_train, Y_train, X_test = my_features() #print X_train,X_test Y_pred = my_classifier_predictions(X_train, Y_train, X_test) utils.generate_submission("../deliverables/test_features.txt", Y_pred) #The above function will generate a csv file of (patient_id,predicted label) and will be saved as "my_predictions.csv" in the deliverables folder. X_traintest, Y_traintest = utils.get_data_from_svmlight( "../data/features_svmlight.validate") Y_trainpred = my_classifier_predictions(X_train, Y_train, X_traintest) auc = roc_auc_score(Y_traintest, Y_trainpred)
def my_classifier_predictions(X_train, Y_train, X_test): #TODO: complete this SVM = LinearSVC() SVM.fit(X_train, Y_train) clf = CalibratedClassifierCV(SVM) clf.fit(X_train, Y_train) y_proba = clf.predict_proba(X_test) utils.generate_submission( "C:/Users/Xiaojun/Desktop/omscs/CSE6250/hw1/deliverables/test_features.txt", y_proba) return SVM.predict(X_test)
def main_v1(): """ 两个城市一起训练 :return: """ df_train, df_train_label, df_test, df_test_pred = utils.load_data() # params = utils.load_param() model = Model(df_train, df_train_label, df_test, params) y_pred = model.train() utils.generate_submission(df_test_pred, y_pred, SUBMISSION_PATH)
def main(): X_train, Y_train = utils.get_data_from_svmlight("../deliverables/features_svmlight.train") Y_pred = my_classifier_predictions(X_train,Y_train) utils.generate_submission("../deliverables/features.train",Y_pred) #The above function will generate a csv file of (patient_id,predicted label) and will be saved as "my_predictions.csv" in the deliverables folder. X,Y = utils.get_data_from_svmlight("../deliverables/features_svmlight.train") print("Classifier: Decision Tree Regressor__________") acc_k,auc_k = get_acc_auc_kfold(X,Y) print(("Average Accuracy in KFold CV: "+str(acc_k))) print(("Average AUC in KFold CV: "+str(auc_k))) acc_r,auc_r = get_acc_auc_randomisedCV(X,Y) print(("Average Accuracy in Randomised CV: "+str(acc_r))) print(("Average AUC in Randomised CV: "+str(auc_r)))
def main(): filepath = "C:/Users/yyan/Downloads/homework1/data/" X_train, Y_train, X_test = my_features(filepath + "train/", filepath + "test/") Y_pred = my_classifier_predictions(X_train, Y_train, X_test) my_prediction = pd.DataFrame(Y_pred, columns=['label'], index=X_test.index).reset_index() my_prediction.columns = ['patient_id', 'label'] my_prediction.to_excel( 'C:/Users/yyan/Downloads/homework1/deliverables/my_predictions.xlsx', index=False) #my_prediction.to_csv('C:/Users/yyan/Downloads/homework1/deliverables/my_predictions.csv') utils.generate_submission( "C:/Users/yyan/Downloads/homework1/deliverables/test_features.txt", Y_pred)
def test(self): print('=====Generating test result=====') # test_feature = np.expand_dims(self.dataloader.test_feature, axis = 0) batch = [] for i in range(self.dataloader.test_feature.shape[0] - self.hist_size + 1): batch.append(self.dataloader.test_feature[i:(i + self.hist_size)]) test_feature = np.array(batch) feed_dict = {self.input:test_feature} if self.enable_dropout: feed_dict[self.keep_prob] = 1 result = self.sess.run(self.pred, feed_dict = feed_dict) self.__make_bound(result) generate_submission(result, '.', '.') print('=====Done=====')
def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', help='paths out') parser.add_argument('-i', help='path in') args = parser.parse_args() list_dir = os.listdir(args.i) first_file = pandas.read_csv(args.i + '/' + list_dir[0]) submission_names = first_file.values[:, 0] labels = first_file.values[:, 1] for id_file in range(len(list_dir) - 1): buf_csv = pandas.read_csv(args.i + '/' + list_dir[id_file]) labels += buf_csv.values[:, 1] labels /= len(list_dir) utils.generate_submission(submission_names, labels, args.p)
def main_v2(): """ 两个城市分开训练 :return: """ data = utils.load_data_respectively() prediction = [] for each in data: # params = utils.load_param(each) model = Model(data[each][0], data[each][1], data[each][2], params) y_pred = model.train(city=each) prediction.append( utils.generate_submission(data[each][3], y_pred, None)) df_submission = prediction[0].append(prediction[1]) df_submission.to_csv(SUBMISSION_PATH, index=False)
def main(): X_train, Y_train, X_test = my_features() acc_k,auc_k = get_acc_auc_kfold(X_train,Y_train) print(auc_k) Y_pred = my_classifier_predictions(X_train,Y_train,X_test) utils.generate_submission("../deliverables/test_features.txt",Y_pred)
# max_delta_step = 0 USELESS # Decrease learning rate xgb7 = XGBRegressor(learning_rate =0.01, n_estimators=5000, max_depth=3, min_child_weight=4, gamma=4, subsample=1, colsample_bytree=0.95, reg_alpha = 0.05, reg_lambda = 6.5, base_score = y.mean(), objective= 'reg:linear', n_jobs=-1, scale_pos_weight=1, random_state=27) modelfit(xgb7, train, predictors) # Cross 0.5660 # numero alberi 349 cross = cross_val_score(xgb7, train, y, scoring = 'r2', n_jobs = -1, cv = 10) print(cross) print('Cross val R2: {}'.format(cross.mean())) utils.generate_submission(xgb7, train, y, test, test_ids, 'xgboost_PCA_ICA_onehot_featselect.csv')
iid=False, cv=5) gsearch10.fit(train[predictors],y) gsearch10.grid_scores_, gsearch10.best_params_, gsearch10.best_score_ # reg_lambda = 16 # Decrease learning rate xgb4 = XGBRegressor(learning_rate =0.01, n_estimators=5000, max_depth=3, min_child_weight=28, gamma=0, subsample=0.8, colsample_bytree=0.8, reg_alpha = 0.026, reg_lambda = 16, objective= 'reg:linear', n_jobs=-1, scale_pos_weight=1, random_state=27) modelfit(xgb4, train, predictors) # Cross 0.5623 # numero alberi 648 cross = cross_val_score(xgb4, train, y, scoring = 'r2', n_jobs = -1, cv = 10) print('Cross val R2: {}'.format(cross.mean())) utils.generate_submission(xgb4, train, y, test, test_ids, 'xgboost_PCA_ICA_tuned.csv')
help='log storage dir for tensorboard') opt = parser.parse_args() with tf.Session() as sess: # define resnet model sample = create_test_dataloader(data_root=opt.data_root, batch_size=opt.batch_size) with tf.variable_scope('model'): model = resnet_model.Model() p3d_out_norm = model(sample['image'], training=False) p3d_out = unnormalize_pose(p3d_out_norm) p3d_out = tf.reshape(p3d_out, [-1, 51]) # restore weights saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(opt.log_dir)) predictions = None with trange(math.ceil(meta_info.NUM_SAMPLES_TEST / opt.batch_size)) as t: for i in t: p3d_out_ = sess.run(p3d_out) if predictions is None: predictions = p3d_out_ else: predictions = np.concatenate([predictions, p3d_out_], axis=0) generate_submission(predictions, 'submission.csv.gz') create_zip_code_files('code.zip')
def main(): X_train, Y_train, X_test = my_features() Y_pred = my_classifier_predictions(X_train,Y_train,X_test) utils.generate_submission("../deliverables/test_features.txt",Y_pred)
n_jobs=-1, scale_pos_weight=1, random_state=27) modelfit(xgb3, train, predictors) xgb4 = XGBRegressor(learning_rate =0.01, n_estimators=5000, max_depth=3, min_child_weight=17, gamma=1.9, subsample=0.7, colsample_bytree=0.95, reg_alpha = 0.016, objective= 'reg:linear', n_jobs=-1, scale_pos_weight=1, random_state=27) modelfit(xgb4, train, predictors) cross = cross_val_score(xgb4, train.drop(['ID', 'y'], axis = 1), y, scoring = 'r2', n_jobs = -1, cv = 10) cross.mean() utils.generate_submission(xgb4, train.drop(['ID', 'y'], axis = 1), y, test.drop('ID', axis = 1), test_ids, 'xgboost_original_ds_tuned.csv')
from model import get_model import numpy as np import os import math # Global Attribute BATCH_SIZE = 1 # needs to be 1 ! DATA_PATH = "/cluster/project/infk/hilliges/lectures/mp19/project2/" SAVE_PATH= "./submitted_weights" config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = "0" with tf.Session(config=config) as sess: testGenerator = DataGenerator(DATA_PATH, batch_size=BATCH_SIZE, name="test") model=get_model(batch_size=BATCH_SIZE, train=False) # Load weights into the new model latest = tf.train.latest_checkpoint(SAVE_PATH) print(latest) model.load_weights(latest) # predict 3d pose model.compile(optimizer='adam', loss=tf.losses.mean_squared_error) p3d_out = model.predict_generator(testGenerator, verbose=1) # compute MPJPE p3d_out = unnormalize_pose_numpy(p3d_out, 0, 1100*2) generate_submission(p3d_out,"submission.csv.gz") create_zip_code_files("code.zip")
depths = range(1, 11) models = [RandomForestRegressor(n_estimators=1500, n_jobs=-1, max_depth=i) for i in depths] values, scores, best_v, best_s = utils.parameter_search(df, y, models, depths, 'r2', xlabel = 'Max depth') n = range(50, 1001, 50) models = [RandomForestRegressor(n_estimators=i, n_jobs=-1) for i in n] values, scores, best_v, best_s = utils.parameter_search(df, y, models, n, 'r2', xlabel = '# alberi') ### K-NN ### knn = KNeighborsRegressor(n_neighbors=10, n_jobs=-1) cross_knn = cross_val_score(knn, df, y, verbose = True, n_jobs=-1, scoring = 'r2', cv = 10) cross_knn.mean() # NO PCA, K = 10, R2 = 0.328 n = range(1, 50) models = [KNeighborsRegressor(n_neighbors=i, n_jobs=-1) for i in n] v_knn, s_knn, best_v_knn, best_s_knn = utils.parameter_search(df, y, models, n, 'r2', 'K') print(best_v_knn, best_s_knn) # CON PCA, K = 14, R2 = 0.325 n = range(1, 50) models = [KNeighborsRegressor(n_neighbors=i, n_jobs=-1) for i in n] v_knn, s_knn, best_v_knn, best_s_knn = utils.parameter_search(df_pca, y, models, n, 'r2', 'K') print(best_v_knn, best_s_knn) ### GENERATE SUBMISSION ### rf = RandomForestRegressor(n_estimators=1000, verbose=True, n_jobs=-1, max_depth=5) utils.generate_submission(rf, df, y, df_test, test_ids, 'rf_n1000_d5.csv')
xgb_param = { 'objective': 'binary:logistic', "booster": "gbtree", "eval_metric": "logloss", "tree_method": 'auto', "silent": 1, "eta": 0.0123, "max_depth": 4, "min_child_weight": 5, "subsample": 0.95, "colsample_bytree": 0.7, "gamma": 0.1, "seed": 930114 } output, xgb_model = generate_pred_output(all_data, xgb_param, xgb_feature, n_trees=5500, day_test=31) submission_output = pd.DataFrame({ "prob": output, "instanceID": instance_id }) utils.generate_submission(output, instance_id) # xgb_model.dump_model("../pre_output/"+ "/mnt/trident/xiaolan/python/Contest/penguin_click/pred_output/" + time.strftime("%Y%m%d",time.gmtime())+"xgb_model.txt")
def test(self): result = self.predictor.predict_proba(self.test_feature) generate_submission(result, '.', '.')