示例#1
0
def def_export_index(name_of_dataset, df_, target_value, number_of_experiment):
    list_export = []

    X_split = df_.loc[:,~df_.columns.isin([target_])]
    y_split = df_.loc[:,df_.columns.isin([target_])]
    path_output = tfm_data.def_check_create_path('EXPERIMENTS_kfold', '')
    kfold = KFold(n_splits=n_experiment_, shuffle=True)
    
    for train, test in kfold.split(df_):
#         list_export.append([tuple(train), tuple(test)])
        list_export.append([train, test])
        
        
    pd.Series(list_export).to_pickle(os.path.join(path_output+str(name_of_dataset)+'.pkl'))
    print(os.path.join(path_output+str(name_of_dataset)+'.pkl'))
def def_export_index(name_of_dataset, df_, target_value, list_left_out,
                     number_of_experiment):
    X_split = df_.loc[:, ~df_.columns.isin([target_value])]
    y_split = df_.loc[:, df_.columns.isin([target_value])]
    path_output = tfm_data.def_check_create_path('EXPERIMENTS', '')
    for n_cells_out in list_left_out:
        print('n_cells_out,', n_cells_out)
        list_export = def_get_n_psplits(X_split, y_split, y_split['cell_type'],
                                        n_cells_out, number_of_experiment)
        pd.Series(list_export).to_pickle(
            os.path.join(path_output + str(name_of_dataset) + '_cell_out_' +
                         str(n_cells_out) + '.pkl'))
        print(
            os.path.join(path_output + str(name_of_dataset) + '_cell_out_' +
                         str(n_cells_out) + '.pkl'))
示例#3
0
config.gpu_options.per_process_gpu_memory_fraction = 0.45
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

time_start = dt.datetime.now().time().strftime('%H:%M:%S')  # = time.time()

# DEFAULT VALUES for PAPER DESIGN
epochs_default = 100
batch_size_default = 10

# TARGET VARIABLE NAME
target_ = 'cell_type'
TYPE_OF_SCALING = False

# THE LOCATION of THE RESULT of SCORE and MODEL
path_hyperband = tfm_data.def_check_create_path('kt_result',
                                                'DELETE_hyperband')
path_output_result = tfm_data.def_check_create_path(
    'kt_result', 'design_' + str(TYPE_OF_SCALING))
path_model = tfm_data.def_check_create_path('kt_result',
                                            'models_' + str(TYPE_OF_SCALING))

# IMPORT EXPERIMENT INDEX
path_experiments = os.path.join(
    os.path.dirname(os.getcwd()) + '/data/EXPERIMENTS/')
list_all_model = sorted(os.listdir(path_experiments))
list_experiments = [
    string for string in list_all_model
    if re.match(re.compile('metabolic_signaling_cell_out_'), string)
]

# LOADING REQUIRED DATASETS
time_start = dt.datetime.now().time().strftime('%H:%M:%S')  # = time.time()

# DEFAULT VALUES for PAPER DESIGN
epochs_default = 100
batch_size_default = 10

# TARGET VARIABLE NAME
target_ = 'cell_type'
TYPE_OF_SCALING = [False, True]

for i_row_scaling in TYPE_OF_SCALING:
    TYPE_OF_EXPERIMENT = 'no_co_' + str(i_row_scaling)

    # THE LOCATION of THE RESULT of SCORE and MODEL
    path_model = tfm_data.def_check_create_path('NN_result',
                                                'models_' + TYPE_OF_EXPERIMENT)

    # LOADING REQUIRED DATASETS
    df_weight_signaling, df_weight_metabolic_signaling = tfm_data.def_load_weight_pathways(
    )
    df_paper, df_signaling, df_metabolic_signaling = tfm_data.def_load_dataset(
        ['cell_type'] + list(df_weight_signaling.index.values),
        ['cell_type'] + list(df_weight_metabolic_signaling.index.values),
        row_scaling=i_row_scaling)

    df_weight_ppi_tf_signaling, df_weight_ppi_tf_metabolic_signaling = tfm_data.def_load_weight_ppi_tf(
        list(df_weight_signaling.index.values),
        list(df_weight_metabolic_signaling.index.values))
    df_weight_both = pd.concat(
        [df_weight_ppi_tf_metabolic_signaling, df_weight_metabolic_signaling],
        axis=1)
示例#5
0
time_start = dt.datetime.now().time().strftime('%H:%M:%S') # = time.time()

# DEFAULT VALUES for PAPER DESIGN
epochs_default=100
batch_size_default=10
dense_layer=100

# TARGET VARIABLE NAME
target_='cell_type'
TYPE_OF_SCALING = [False, True]

for i_row_scaling in TYPE_OF_SCALING:

    # THE LOCATION of THE RESULT of SCORE and MODEL
    path_output = tfm_data.def_check_create_path('NN_result_2', 'design_'+str(i_row_scaling))
    path_model = tfm_data.def_check_create_path('NN_result_2', 'models_'+str(i_row_scaling))

    # IMPORT EXPERIMENT INDEX
    path_experiments = os.path.join(os.path.dirname(os.getcwd())+'/data/EXPERIMENTS/')
    list_all_model = sorted(os.listdir(path_experiments))
    list_experiments = [string for string in list_all_model if re.match(re.compile('default_cell_out'), string)]

    # LOADING REQUIRED DATASETS
    df_weight_signaling, df_weight_metabolic_signaling = tfm_data.def_load_weight_pathways()
    df_paper, df_signaling, df_metabolic_signaling = tfm_data.def_load_dataset(['cell_type']+list(df_weight_signaling.index.values)
                                                                                    , ['cell_type']+list(df_weight_metabolic_signaling.index.values)
                                                                                    , row_scaling=True)

    del(df_signaling)
    del(df_metabolic_signaling)
示例#6
0
time_start = dt.datetime.now().time().strftime('%H:%M:%S')  # = time.time()

# DEFAULT VALUES for PAPER DESIGN
epochs_default = 100
batch_size_default = 10
dense_layer = 100

# TARGET VARIABLE NAME
target_ = 'cell_type'
TYPE_OF_SCALING = [False, True]

for i_scaling in TYPE_OF_SCALING:

    # THE LOCATION of THE RESULT of SCORE and MODEL
    # path_hyperband_ = tfm_data.def_check_create_path('kt_result', '')
    path_hyperband_ = tfm_data.def_check_create_path('kt_result', 'delete')
    path_output_result = tfm_data.def_check_create_path(
        'kt_result', 'design_no_co_' + str(i_scaling))
    path_model = tfm_data.def_check_create_path(
        'kt_result', 'models_no_co_' + str(i_scaling))

    # LOADING REQUIRED DATASETS
    df_weight_signaling, df_weight_metabolic_signaling = tfm_data.def_load_weight_pathways(
    )
    df_paper, df_signaling, df_metabolic_signaling = tfm_data.def_load_dataset(
        ['cell_type'] + list(df_weight_signaling.index.values),
        ['cell_type'] + list(df_weight_metabolic_signaling.index.values),
        row_scaling=i_scaling,
        retrieval=False)

    df_weight_dense = pd.DataFrame(df_paper.columns[1:]).set_index('Sample')
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from tensorflow import keras
import tensorflow.keras.backend as K

import warnings
warnings.filterwarnings('ignore')

time_start = dt.datetime.now().time().strftime('%H:%M:%S')  # = time.time()

# Clustering is designed with 20 experiments.
# Testing size is %20 (it means that Kfold split is 5)
# and I am applying 4 times to reach the 20 experiments.

# THE LOCATION of THE RESULT
path_output = tfm_data.def_check_create_path(main_folder='clustering_result',
                                             sub_folder='')

# Loading required data
df_weight_pathway_signaling, df_weight_pathway_metabolic_signaling = tfm_data.def_load_weight_pathways(
)
df_paper, df_paper_signaling, df_paper_metabolic_signaling = tfm_data.def_load_dataset(
    ['cell_type'] + list(df_weight_pathway_signaling.index.values),
    ['cell_type'] + list(df_weight_pathway_metabolic_signaling.index.values),
    row_scaling=False,
    retrieval=False)

print('Normalization paper data')
df_scaler_ss = tfm_data.def_dataframe_normalize(df_paper, StandardScaler(),
                                                'cell_type')
print('Normalization signaling data')
df_scaler_ss_signaling = tfm_data.def_dataframe_normalize(