示例#1
0
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.cross_validation import ShuffleSplit
from sklearn.metrics import make_scorer
from pandas import DataFrame, Series
from dream_2014_functions import read_data_sets, save_gct_data, submit_solution, write_features_sc3, ev_code_sc3


def spearm_cor_func(expected, pred):
    return spearmanr(expected, pred)[0]

# Folders
submission_filename_prefix = 'sc3_emanuel_phase2_'

# Import data
train_exp, train_cnv, train_ess, leader_exp, leader_cnv, prioritized_genes = read_data_sets()

X_train_pre = train_exp
X_test_pre = leader_exp

var_thres = VarianceThreshold(0.65).fit(X_train_pre)
X_train_pre = X_train_pre.loc[:, var_thres.get_support()]
X_test_pre = X_test_pre.loc[:, var_thres.get_support()]

# Prepare features
features = X_train_pre.columns
important_features = []

for gene in prioritized_genes:
    # Assemble prediction variables
    X_train = X_train_pre
示例#2
0
    return 1 / ((np.median(matrix, axis=0) / matrix) ** hill_coef + 1)


def count_outliers(matrix):
    outliers_counts = []
    for i in range(len(matrix.columns)):
        Q1 = np.percentile(matrix.ix[:, i], 25)
        Q3 = np.percentile(matrix.ix[:, i], 75)
        IQR = Q3 - Q1

        outliers_counts.append(sum(matrix.ix[:, i] < (Q1 - 1.5 * IQR)) + sum(matrix.ix[:, i] > (Q3 + 1.5 * IQR)))

    return outliers_counts

# Import data-sets
exp, cnv, ess, leader_exp, leader_cnv, prioritized_genes = read_data_sets()

# Split training data-set in two
train_exp = exp.loc[training_cell_lines, ]
train_cnv = cnv.loc[training_cell_lines, ]
train_ess = ess.loc[training_cell_lines, ]

pred_exp = exp.loc[leader_board_cell_lines, ]
pred_cnv = cnv.loc[leader_board_cell_lines, ]
pred_ess = ess.loc[leader_board_cell_lines, ].T

# Configurations
predictions = DataFrame(None, index=prioritized_genes, columns=pred_ess.axes[1])
spearman = make_scorer(spearm_cor_func, greater_is_better=True)
predictions_features = {}