Пример #1
0
df_weight_signaling, df_weight_metabolic_signaling = tfm_data.def_load_weight_pathways(
)
df_paper_9437, df_signaling, df_metabolic_signaling = tfm_data.def_load_dataset(
    ['cell_type'] + list(df_weight_signaling.index.values),
    ['cell_type'] + list(df_weight_metabolic_signaling.index.values),
    row_scaling=TYPE_OF_SCALING)
df_weight_ppi_tf_signaling, df_weight_ppi_tf_metabolic_signaling = tfm_data.def_load_weight_ppi_tf(
    list(df_weight_signaling.index.values),
    list(df_weight_metabolic_signaling.index.values))
df_weight_both = pd.concat(
    [df_weight_ppi_tf_metabolic_signaling, df_weight_metabolic_signaling],
    axis=1)
print('df_weight_both shape , ', df_weight_both.shape)

print('Normalization metabolic and signaling data')
df_ss = tfm_data.def_dataframe_normalize(df_metabolic_signaling,
                                         StandardScaler(), 'cell_type')

# DELETE UNUSED DATASET
del (df_paper_9437)
del (df_signaling)
del (df_weight_ppi_tf_signaling)
del (df_weight_signaling)

del (df_metabolic_signaling)
del (df_weight_ppi_tf_metabolic_signaling)

# METABOLIC and SIGNALING PATHWAY

# EXPERIMENT DATASETS
print('the index of experiment dataset, ', list_experiments)
for i_experiment in list_experiments:
          df_weight_paper_signaling_dense_pathway.shape)

    df_weight_paper_metabolic_signaling_dense_pathway = df_weight_dense.merge(
        pd.DataFrame(df_paper.columns[1:]).set_index('Sample').merge(
            df_weight_metabolic_signaling,
            left_index=True,
            right_index=True,
            how='left').fillna(0),
        left_index=True,
        right_index=True,
        how='inner')
    print('df_weight_paper_metabolic_signaling_dense_pathway shape  , ',
          df_weight_paper_metabolic_signaling_dense_pathway.shape)

    print('Normalization paper data - 9437 genes')
    df_ss = tfm_data.def_dataframe_normalize(df_paper, StandardScaler(),
                                             'cell_type')
    #     df_mms = tfm_data.def_dataframe_normalize(df_paper, MinMaxScaler(), 'cell_type')

    # # ORIGINAL DATASET (9437 genes)

    # EXPERIMENT DATASETS
    array_train_X_ss, array_train_y_ss = [], []
    array_test_X_ss, array_test_y_ss = [], []

    X_train_ss, X_test_ss, y_train_ss, y_test_ss = tfm_data.def_split_train_test_by_index(
        dataframe_=df_ss,
        train_index_=df_ss.index,
        test_index_=[0],
        target_feature_=target_)

    array_train_X_ss.append(np.array(X_train_ss))