示例#1
0
    #Create directory if directory does not exist
    filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, dataset)

    if not os.path.exists(filepath):
        os.makedirs(filepath)

    # Import  dataset and target
    if dataset == 'MESA':
        X = pd.read_csv(
            '../../data/mesa/MESA_Clinical_data_full_COMBI-BIO_non-verbose.csv',
            sep=',',
            header=0,
            index_col=1)

        X = p2.filt_imp(X, 0.1)

    else:
        X = pd.read_csv('../../data/simulated/mvnsim/mvnsim' + dataset +
                        '.csv',
                        sep=',',
                        header=0,
                        index_col=0)

        y = np.load('../../data/simulated/mvnsim/target' + dataset + '.npy')
    #print(y)
    #print(y.shape)
    #print(X.shape)
    '''
    p2.distribution_boxplot(X,
                         y,
scriptname = 'mesa_kpca2'

#List of datasets to test
#dataset_list = ['diabetes', 'sex', 'cac_binomial', 'cac_extremes', 'family_hx_diabetes', 'parent_cvd_65_hx', 'family_hx_cvd', 'bp_treatment', 'diabetes_treatment', 'lipids_treatment', 'mi_stroke_hx', 'plaque']

dataset_list = ['diabetes', 'sex', 'cac_binomial']

for dataset in dataset_list:

    print('\n##### Now running dataset %s #####' % dataset)
    #Create directory if directory does not exist
    filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, dataset)

    if not os.path.exists(filepath):
        os.makedirs(filepath)

    X = pd.read_csv(
        '../../data/mesa/MESA_CPMG_MBINV2_ManuallyBinnedData_BatchCorrected_LogTransformed_1stcol_%s.csv'
        % dataset,
        sep=',',
        header=None,
        index_col=0)
    #print(X)
    X_imp = p2f.filt_imp(X, 0.1)

    X_imp_df = pd.DataFrame.from_records(X_imp)
    #print(X_imp_df)
    X, y = p2f.tsplit(X_imp_df)
    #print(y)

    X_scaled = scale(X)
示例#3
0
    #Create directory if directory does not exist
    filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, dataset)

    if not os.path.exists(filepath):
        os.makedirs(filepath)

    # Import  dataset and target
    if dataset == 'MESA':
        X = pd.read_csv(
            '../../data/mesa/MESA_Clinical_data_full_COMBI-BIO_non-verbose.csv',
            sep=',',
            header=0,
            index_col=1)

        X = filt_imp(X, 0.1)

    else:
        X = pd.read_csv('../../data/simulated/mvnsim/mvnsim' + dataset +
                        '.csv',
                        sep=',',
                        header=0,
                        index_col=0)

        y = np.load('../../data/simulated/mvnsim/target' + dataset + '.npy')
    #print(y)
    #print(y.shape)
    #print(X.shape)

    distribution_boxplot(
        X,
示例#4
0
# Collect optimal tier1 gammas
opt_t1_gammas = []

#Using first input dataset to generate toy datasets
inp_df = pd.read_csv(
    '../../data/mesa/MESA_CPMG_MBINV2_ManuallyBinnedData_BatchCorrected_LogTransformed_1stcol_%s.csv'
    % inp_dataset_list[0][1],
    sep=',',
    header=None,
    index_col=0)

print(
    '\nUsing %s dataset to generate simulated datasets for the purpose of tuning algorithms and hyperperameters.'
    % inp_dataset_list[0][0])
X_imp = p2f.filt_imp(inp_df, 0.1)
X, y = p2f.tsplit(X_imp)
toy_dataset_list, toy_y = p2f.toybox_gen(X)

for toy_label, toy_X in toy_dataset_list:

    print('\n##### Now running dataset %s through tier 1 #####' % toy_label)

    #Create directory if directory does not exist
    filepath = '../../figs/out/%s/%s/%s/' % (scriptname, nowdate, toy_label)
    plotpath = '%splotting/' % filepath

    if not os.path.exists(filepath):
        os.makedirs(filepath)
        os.makedirs(plotpath)
示例#5
0
import pandas as pd
import numpy as np
from numpy.random import multivariate_normal
import p2funcs as p2f

inp_df = pd.read_csv(
    '../../data/mesa/MESA_CPMG_MBINV2_ManuallyBinnedData_BatchCorrected_LogTransformed_Data.csv',
    sep=',')
imp_df = p2f.filt_imp(inp_df, 0.1)
'''
def toybox_gen(inp_df):
    
    
    #Define size of component using input data
    cols, rows = inp_df.shape
    comp_size = [cols, int(round(rows/2))]
    
    cov1 = np.array([[[0.3, 0.2], [0.2, 0.2]], [[0.6, 0.4], [0.4, 0.4]], [[1.2, 0.8], [0.8, 0.8]], [[2.4, 1.6], [1.6, 1.6]], [[6, 4], [4, 4]],[[9, 6], [6, 6]]])
    cov2 = np.array([[12, 8],[8, 8]])
    
    mean1 = np.array([[20, 15], [20, 15], [20, 15], [20, 15], [20, 15], [20.5, 15.5]])
    mean2 = [20, 15]
    
    #Set up target array
    target = np.zeros(100, dtype=int)
    target[0:50] = '1'
    
    dataset_list = []
    
    # counter for labelling dataset
    counter = 1