Python Preprocess.activity примеры использования

Язык программирования: Python
Пространство имен/Пакет: Preprocess
Класс/Тип: Preprocess
Метод/Функция: activity
Примеров на hotexamples.com: 1
Python Preprocess.activity - 1 пример найден. Это лучшие примеры Python кода для Preprocess.Preprocess.activity, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.
Основные методы
Показать Скрыть
Preprocess(30)
preprocess_a(3)
loadTs(2)
KMEANS(1)
OTSUThreshold(1)
PreprocessData(1)
_parseAllRawApks(1)
activity(1)
applyMask(1)
cap_outlier(1)
preprocess_ce(1)
Пример #1
Показать файл
Файл: Dataset.py Проект: timedreamer/OutPredict
    def loadData(self, input_dir, name_run, script_dir, data_type,
                 data_type_lo, delTmax, delTmin, tau, tfa_bool, timehorizon,
                 percent_LO_points, num_ets_lo, time_step, thres_coeff_var,
                 prior_type, prior_file):

        str_output = ""
        uniq_dups = []

        np.random.seed(self.rnd_seed)
        pps = Preprocess(self.rnd_seed)

        pps.delTmax = delTmax
        pps.delTmin = delTmin
        pps.tau = tau
        pps.input_dir = input_dir
        pps.str_output = str_output
        pps.flag_print = self.flag_print
        pps.priors_file = prior_file

        #IF CONDITIONS HAVE DUPLICATED NAMES, PRINT A META DATA FILE CALLED "meta_data_uniq.tsv" with only unique conds
        metadata_1 = pps.input_dataframe(pps.meta_data_file,
                                         has_index=False,
                                         strict=False)
        num_dups_conds = len(
            metadata_1.condName[metadata_1.condName.duplicated(keep=False)])

        if num_dups_conds > 0:
            uniq_dups = (metadata_1.condName[metadata_1.condName.duplicated(
                keep=False)]).unique()
            num_uniq_dups = len(uniq_dups)
            if self.flag_print:
                print("name of duplicated conds in meta data: ",
                      num_dups_conds)
                print("number of unique in dups conds", num_uniq_dups)
            metadata_1.set_index(['condName'], inplace=True)

            metadata_1_series = metadata_1.groupby(level=0).cumcount()
            metadata_1_series = "repet" + metadata_1_series.astype(str)
            metadata_1.index = metadata_1.index + metadata_1_series.replace(
                'repet0', '')
            #metadata_1.index = metadata_1.index + "_dup_"+ metadata_1.groupby(level=0).cumcount().astype(str).replace('0','')

            #The following code is to fix names of prevCol for duplicated conditions
            metadata_copy = metadata_1.copy()
            name_prev_cond = np.nan
            count = 0
            for index, row in (metadata_1[metadata_1.isTs == True]).iterrows():
                if (row['is1stLast'] == 'm') or (row['is1stLast'] == 'l'):
                    if row['prevCol'] != name_prev_cond:
                        if self.flag_print:
                            print(index, row)
                        metadata_copy.at[index, 'prevCol'] = name_prev_cond
                        count = count + 1
                name_prev_cond = index

            if self.flag_print:
                print(count)
            if count != num_dups_conds - num_uniq_dups:
                raise ValueError('Wrong meta data format')

            #metadata_copy.drop(['Unnamed: 0'], axis=1, inplace=True)
            metadata_copy.reset_index(inplace=True)
            metadata_copy.columns = [
                'condName', 'isTs', 'is1stLast', 'prevCol', 'del.t'
            ]
            cols = ['isTs', 'is1stLast', 'prevCol', 'del.t', 'condName']
            metadata_copy = metadata_copy[cols]

            pps.meta_data_file = "meta_data_uniq.tsv"
            path_file = pps.input_path(pps.meta_data_file)
            # metadata_copy.is1stLast = '"' + metadata_copy.is1stLast + '"'
            # metadata_copy.prevCol = '"' + metadata_copy.prevCol + '"'
            # metadata_copy.condName = '"' + metadata_copy.condName + '"'
            # metadata_copy.columns = ['"isTs"', '"is1stLast"', '"prevCol"', '"del.t"', '"condName"']
            metadata_copy.to_csv(path_file, sep="\t", index=False,
                                 na_rep='NA')  #, quoting=csv.QUOTE_NONE)

            #Add to expression file duplicated conds, this is important for how the leave-out section is implemented
            expression_1 = pps.input_dataframe(pps.expression_matrix_file,
                                               has_index=False,
                                               strict=False)
            count = 0
            for ud in uniq_dups:
                pattern = re.compile(ud + "repet" + "\d")
                for cond_tmp in metadata_copy.condName:
                    if pattern.match(cond_tmp):
                        expression_1[cond_tmp] = expression_1[ud]
                        count = count + 1

            if count != num_dups_conds - num_uniq_dups:
                raise ValueError('Wrong expression/meta_data format')

            col_arr = (np.asarray(expression_1.columns[1:]))
            expression_1.columns = np.insert(col_arr, 0, "")
            pps.expression_matrix_file = "expression_new.tsv"
            path_file = pps.input_path(pps.expression_matrix_file)
            expression_1.to_csv(path_file, sep="\t", index=False,
                                na_rep='NA')  #, quoting=csv.QUOTE_NONE)

        #END CODE FOR PRINTING NEW UNIQUE META DATA FILE AND NEW EXPRESSION FILE

        str_output = pps.get_data(thres_coeff_var, str_output, prior_type)

        pps.compute_common_data(uniq_dups, time_step)

        #CODE FOR LEAVE OUT DATA
        TS_vectors, steady_state_cond, index_steady_state, num_total_timeseries_points = self.readDatasetFromMetaDataFile(
            pps.meta_data)

        #Parse data to dynGenie3 format in case parse_4dyng3 is set to "True"

        # print pps.expression_matrix.head()
        # print pps.expression_matrix.index.tolist()
        # print pps.expression_matrix.loc["G1", :]

        if self.parse_4dyng3:
            #(TS_data,time_points,genes,TFs,alphas)

            # import sys
            # reload(sys)
            # sys.setdefaultencoding('utf8')
            print("Start parsing data to dynGenie3 format")
            TS_data = list()
            time_points = list()
            genes = pps.expression_matrix.index.tolist()
            genes = np.asarray(genes).astype(str)
            genes = genes.tolist()
            num_gene_names = len(genes)
            alphas = [0.02] * num_gene_names
            alphas = np.asarray(alphas).astype(float)
            alphas = alphas.tolist()

            for ts_tmp in TS_vectors:
                #for loop over a single timeseries

                ts_tmp_vect = list(ts_tmp.keys())

                num_time_points_intstmp = len(ts_tmp_vect)

                ts_dynGenie3 = np.zeros(
                    (num_time_points_intstmp, num_gene_names))
                ts_dynGenie3 = np.transpose(
                    pps.expression_matrix.loc[:, ts_tmp_vect])
                TS_data.append(np.asarray(ts_dynGenie3))

                time_points_i = np.zeros(num_time_points_intstmp)

                for j, key in enumerate(ts_tmp_vect):
                    time_points_i[j] = np.float(ts_tmp[key])

                time_points.append(time_points_i)

            # print TS_data
            # print type(TS_data[1])

            SS_data = np.transpose(pps.expression_matrix[steady_state_cond])

            #(TS_data,time_points,genes,TFs,alphas)
            TFs = np.asarray(pps.tf_names).astype(str)
            TFs = TFs.tolist()

            TS_data_file = "TS_data.pkl"
            path_file = pps.input_path(TS_data_file)
            with open(path_file, 'wb') as f:
                pickle.dump([TS_data, time_points, genes, TFs, alphas], f)
            # cPickle.dump(TS_data, f)
            # print type(TS_data)
            # cPickle.dump(time_points, f)
            # print type(time_points)
            # cPickle.dump(alphas, f)
            # print type(alphas)
            # cPickle.dump(genes, f)
            # print type(genes)
            f.close()
            # with open(output_path_estimators+'/Gene'+str(output_idx), 'rb') as f:
            #     treeEstimator = cPickle.load(f)
            SS_data_file = "SS_data.txt"
            path_file = pps.input_path(SS_data_file)
            SS_data.to_csv(path_file, sep="\t", index=False, na_rep='NA')
            print("End parsing data to dynGenie3 format")
            # # #END parse data to dynGenie3 format

        #Debug
        # pps.design.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_design.txt", sep="\t")
        # pps.response.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_response.txt", sep="\t")
        # pps.meta_data.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_meta_data.txt", sep="\t")

        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            if num_ets_lo > 0:
                ts_lopoints_x, ts_lopoints_y, timeseries_indices_lo = self.choose_LO_timeseries_random_withTimehorizon(
                    num_ets_lo, TS_vectors, timehorizon)
            else:
                ts_lopoints_x, ts_lopoints_y, t0_lopoints, timeseries_indices_lo = self.choose_timeseries_LO_lastPoints_random_withTimehorizon(
                    percent_LO_points, num_total_timeseries_points, TS_vectors,
                    timehorizon)

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            ss_lo_cond_names = list()
            ss_lo_cond_names = np.asarray(ss_lo_cond_names)
            ss_lo_indices = list()
            ss_lo_indices = np.asarray(ss_lo_indices)

            if len(steady_state_cond) > 0:
                ss_lo_cond_names, ss_lo_indices = self.choose_steadystate_LO_points_random(
                    percent_LO_points, steady_state_cond)

        #Debug
        # print "num_total_timeseries_points", num_total_timeseries_points
        # print "len(ss_lo_cond_names)", len(steady_state_cond)
        # print "len(pps.meta_data)", len(pps.meta_data)

        #TS_vectors, steady_state_cond, index_steady_state, num_total_timeseries_points
        # TS_vectors [OrderedDict([('S0_1', 0),
        #               ('S1_1', 60.0),
        #               ('S2_1', 120.0),
        #               ('S3_1', 180.0),
        #               ('S4_1', 240.0),
        #               ('S5_1', 300.0),
        #               ('S6_1', 360.0)]),
        #  OrderedDict([('S0_2', 0),
        #               ('S1_2', 60.0),
        #               ('S2_2', 120.0),
        #               ('S3_2', 180.0),
        #               ('S4_2', 240.0),
        #               ('S5_2', 300.0),
        #               ('S6_2', 360.0)]),......]
        # steady_state_cond
        # array(['LBexp_1', 'LBexp_2', 'LBexp_3',....]

        # index_steady_state
        # array([163, 164, 165, 166, 167,....]

        # num_total_timeseries_points
        # 163

        #Leave-out Time-series points
        #ts_lopoints_x, ts_lopoints_y, timeseries_indices_lo
        # timeseries_indices_lo left out
        # array([31, 15, 26, 17])
        # ts_lopoints_x, ts_lopoints_y
        # OrderedDict([('MG+90_2', 95.0), ('SMM_1', 0), ('dia5_3', 5.0), ('SMM_3', 0)])
        # OrderedDict([('MG+120_2', 125.0), ('Salt_1', 10.0), ('dia15_3', 15.0), ('Salt_3', 10.0)])

        #Leave-out Steady state points
        #ss_lo_cond_names, ss_lo_indices
        # array(['H2O2_1', 'LBGexp_2', 'LBtran_2', ....]
        # array([100,  10,   4,  81,  97,  65, ... ]

        if self.flag_print:
            print("Shape of design var before leaving-out data: ",
                  str(pps.design.shape))
            print("Shape of response var before leaving-out data: ",
                  str(pps.response.shape))

        str_output = str_output + "Shape of design var before leaving-out data: " + str(
            pps.design.shape) + "\n"
        str_output = str_output + "Shape of response var before leaving-out data: " + str(
            pps.response.shape) + "\n"

        #Debug
        # w = csv.writer(open("ts_lopoints_x.csv", "w"))
        # for key, val in ts_lopoints_x.items():
        #     w.writerow([key, val])

        # pps.design.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_design.txt", sep="\t")
        # pps.response.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_response.txt", sep="\t")

        #Before splitting the dataset in training and test, check if want to learn on SS only or TS only
        if data_type == "SS":
            str_output = str_output + "::::::::STEADY-STATE ONLY - LOOK AT JUST THE SHAPES OF DESIGN AND RESPONSE VARIABLES" + "\n"
            only_steady_state_indxes = (
                pps.design.columns.isin(steady_state_cond))
            pps.design = pps.design.loc[:,
                                        only_steady_state_indxes]  #, axis=1, inplace=True)
            pps.response = pps.response.loc[:,
                                            only_steady_state_indxes]  #, axis=1, inplace=True)
            pps.half_tau_response = pps.half_tau_response.loc[:,
                                                              only_steady_state_indxes]

            pps.delta_vect = pps.delta_vect.loc[:, (
                pps.delta_vect.columns.isin(steady_state_cond)
            )]  #, axis=1, inplace=True)

        if data_type == "TS":
            str_output = str_output + "::::::::TIME-SERIES ONLY - LOOK AT JUST THE SHAPES OF DESIGN AND RESPONSE VARIABLES" + "\n"
            pps.design.drop(steady_state_cond, axis=1, inplace=True)
            pps.response.drop(steady_state_cond, axis=1, inplace=True)
            pps.half_tau_response.drop(steady_state_cond, axis=1, inplace=True)

            pps.delta_vect.drop(steady_state_cond, axis=1, inplace=True)

        # print "Shape of design design before splitting: "+str(pps.design.shape)
        # print "Shape of response response before splitting: "+str(pps.response.shape)
        #
        # design_tmp = pps.design
        # tfs_tmp = list(set(pps.tf_names).intersection(pps.expression_matrix.index))
        # X_tmp = np.asarray(design_tmp.loc[tfs_tmp,:].values)
        # X_tmp = (X_tmp - (X_tmp.mean(axis=1)).reshape(-1,1)) / (X_tmp.std(axis=1)).reshape(-1,1)
        # design_tmp_2 = pd.DataFrame(X_tmp ,index = tfs_tmp, columns = design_tmp.columns)
        # pps.design = design_tmp_2
        #
        # print "Shape of design after normalization/standardization: ", pps.design.shape
        #
        # response_tmp = pps.response
        # Y_tmp = np.asarray(response_tmp.values)
        # Y_tmp = (Y_tmp - (Y_tmp.mean(axis=1)).reshape(-1,1)) / (Y_tmp.std(axis=1)).reshape(-1,1)
        # response_tmp_2 = pd.DataFrame(Y_tmp ,index = response_tmp.index, columns = response_tmp.columns)
        # pps.response = response_tmp_2
        #
        # print "Shape of response after normalization/standardization: ", pps.response.shape

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            #Leaving out Steady state points
            pps.leave_out_ss_design = pps.design[ss_lo_cond_names]
            pps.design.drop(ss_lo_cond_names, axis=1, inplace=True)
            pps.leave_out_ss_response = pps.response[ss_lo_cond_names]
            pps.response.drop(ss_lo_cond_names, axis=1, inplace=True)
            pps.half_tau_response.drop(ss_lo_cond_names, axis=1, inplace=True)
            if self.flag_print:
                print("Shape of leave out SS design var: ",
                      pps.leave_out_ss_design.shape)
                print("Shape of leave out SS response var: ",
                      pps.leave_out_ss_response.shape)

            pps.delta_vect.drop(ss_lo_cond_names, axis=1, inplace=True)

        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            #Leaving out Time series points
            pps.leave_out_ts_design = pps.design[list(ts_lopoints_x.keys())]
            pps.design.drop(list(ts_lopoints_x.keys()), axis=1, inplace=True)
            pps.leave_out_ts_response = pps.response[list(
                ts_lopoints_x.keys())]
            pps.response.drop(list(ts_lopoints_x.keys()), axis=1, inplace=True)
            pps.half_tau_response.drop(list(ts_lopoints_x.keys()),
                                       axis=1,
                                       inplace=True)
            if self.flag_print:
                print("Shape of leave out TS design var: ",
                      pps.leave_out_ts_design.shape)
                print("Shape of leave out TS response var: ",
                      pps.leave_out_ts_response.shape)

            pps.delta_vect.drop(list(ts_lopoints_x.keys()),
                                axis=1,
                                inplace=True)

        if self.flag_print:
            print("Shape of design var after leaving-out data: ",
                  pps.design.shape)
            print("Shape of response var after leaving-out data: ",
                  pps.response.shape)

        str_output = str_output + "Shape of design var after leaving-out data: " + str(
            pps.design.shape) + "\n"
        str_output = str_output + "Shape of response var after leaving-out data: " + str(
            pps.response.shape) + "\n"

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            str_output = str_output + "Shape of leave out SS design var: " + str(
                pps.leave_out_ss_design.shape) + "\n"
            str_output = str_output + "Shape of leave out SS response var: " + str(
                pps.leave_out_ss_response.shape) + "\n"

        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            str_output = str_output + "Shape of leave out TS design var: " + str(
                pps.leave_out_ts_design.shape) + "\n"
            str_output = str_output + "Shape of leave out TS response var: " + str(
                pps.leave_out_ts_response.shape) + "\n"

        #END CODE FOR LEAVE OUT DATA

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            steady_state_cond_new = list(steady_state_cond.copy())
            for element in ss_lo_cond_names:
                steady_state_cond_new.remove(element)
        else:
            steady_state_cond_new = steady_state_cond

        index_steady_state_new = []
        indexes_all = list(range(0, len(pps.design.columns)))
        delta_vect = list()
        #Debug
        #print len(indexes_all)
        if data_type == "SS" or data_type == "TS-SS":
            for element in steady_state_cond_new:
                index_steady_state_new.append(
                    pps.design.columns.get_loc(element))
            index_steady_state_new = np.asarray(index_steady_state_new)

        index_time_points_new = []
        if data_type == "TS" or data_type == "TS-SS":
            index_time_points_new = set(indexes_all) - set(
                index_steady_state_new)
            index_time_points_new = np.asarray(list(index_time_points_new))

        #Debug
        #print len(index_time_points_new)
        #print len(index_steady_state_new)

        #Debug
        # print "pps.priors_data.shape", pps.priors_data.shape
        # print "len(pps.priors_data.abs().sum(axis=0))", len(pps.priors_data.abs().sum(axis=0))
        # print "len(pps.priors_data.abs().sum(axis=0))", len(pps.priors_data.abs().sum(axis=1))
        # print "len(pps.priors_data.sum(axis=0))", len(pps.priors_data.sum(axis=0))
        # print "type(np.abs(pps.priors_data))", type(np.abs(pps.priors_data))
        # pps.priors_data.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_ppspriors_data.txt", sep="\t")
        # pps.gold_standard.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_ppsgold_standard.txt", sep="\t")
        # print type(pps.gold_standard)
        # pps.design.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_design.txt", sep="\t")
        # pps.response.to_csv(os.path.abspath(os.path.join(pps.input_dir))+"/_response.txt", sep="\t")

        if prior_type == "binary_all":
            num_edges_prior = np.sum(pps.priors_data.values != 0)
        num_edges_gs = np.sum(pps.gold_standard.values != 0)
        if self.flag_print:
            if prior_type == "binary_all":
                print("Number of edges in the prior: ", num_edges_prior,
                      pps.priors_data.shape)
            print(
                "Number of edges in the evaluation part of the gold standard: ",
                num_edges_gs, pps.gold_standard.shape)
        if prior_type == "binary_all":
            str_output = str_output + "Number of edges in the prior: " + str(
                num_edges_prior) + str(pps.priors_data.shape) + "\n"
        str_output = str_output + "Number of edges in the evaluation part of the gold standard: " + str(
            num_edges_gs) + str(pps.gold_standard.shape) + "\n"

        # print "pps.activity.shape", pps.activity.shape
        # print pps.expression_matrix.shape
        # print len(pps.tf_names)
        # print pps.gold_standard.shape
        # print pps.response.shape

        if tfa_bool:
            #compute_activity()
            # """
            # Compute Transcription Factor Activity
            # """
            if self.flag_print:
                print('Computing Transcription Factor Activity ... ')
            tfs = list(
                set(pps.tf_names).intersection(pps.expression_matrix.index))
            #TFA_calculator = TFA(pps.priors_data, pps.design, pps.half_tau_response, tfs)
            pps.activity = pps.compute_transcription_factor_activity(tfs)
            #pps.activity, pps.priors_data= TFA_calculator.compute_transcription_factor_activity()

        else:
            if self.flag_print:
                print(
                    'Using just expression, NO Transcription Factor Activity')
            expression_matrix = pps.design
            tfs = list(
                set(pps.tf_names).intersection(pps.expression_matrix.index))
            activity = pd.DataFrame(expression_matrix.loc[tfs, :].values,
                                    index=tfs,
                                    columns=expression_matrix.columns)
            if self.flag_print:
                print(('Design matrix of shape: {}'.format(activity.shape)))
            pps.activity = activity

        tf_names = pps.activity.index.tolist(
        )  #pps.priors_data.columns #pps.tf_names

        #Leave-out SS
        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            expression_matrix_lo_ss = pps.leave_out_ss_design
            leave_out_ss_design = pd.DataFrame(
                expression_matrix_lo_ss.loc[tf_names, :].values,
                index=tf_names,
                columns=expression_matrix_lo_ss.columns)
            pps.leave_out_ss_design = leave_out_ss_design

        #Leave-out TS
        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            expression_matrix_lo_ts = pps.leave_out_ts_design
            leave_out_ts_design = pd.DataFrame(
                expression_matrix_lo_ts.loc[tf_names, :].values,
                index=tf_names,
                columns=expression_matrix_lo_ts.columns)
            pps.leave_out_ts_design = leave_out_ts_design

        expression = pps.expression_matrix  #this is the initial one but then there is filtering and stuff

        goldstandard = pps.gold_standard
        genelist = pps.response.index.tolist(
        )  #pps.expression_matrix.index.tolist()
        numtfs = len(tf_names)

        X = pps.activity.transpose().values  #X [n_samples, n_features]
        y = pps.response.transpose().values  #y [n_samples, num_genes]

        if self.flag_print:
            print("Shape of design var X: " + str(X.shape))
            print("Shape of response var Y: " + str(y.shape))
        str_output = str_output + "Shape of design var X: " + str(
            X.shape) + "\n"
        str_output = str_output + "Shape of response var Y: " + str(
            y.shape) + "\n"

        if self.flag_print:
            print("X False", np.any(np.isnan(X)))

            print("X True", np.all(np.isfinite(X)))

            print("y False", np.any(np.isnan(y)))

            print("y True", np.all(np.isfinite(y)))

        X = np.float64(X)

        y = np.float64(y)

        output_path = script_dir + "/output/" + name_run + "_numgenes" + str(
            len(genelist)) + "_numtfs" + str(numtfs)

        if not os.path.exists(output_path):
            os.makedirs(output_path)
        # else:
        # 	if self.poot or not(self.auto_meth):
        # 		num_folders = len([name for name in os.listdir(script_dir+"/output/") if
        # 							   os.path.isdir(os.path.join(script_dir+"/output/",name)) and (name_run+"_numgenes"+str(len(genelist))+"_numtfs"+str(numtfs)) in name])
        # 		os.makedirs(output_path + "_" + str(num_folders))
        # 		output_path = output_path + "_" + str(num_folders)

        if prior_type == "binary_all":
            if not os.path.exists(input_dir + "/priors"):
                os.makedirs(input_dir + "/priors")

        if prior_type == "binary_all":
            #Save plot of prior number of targets for each TF distribution
            priors_data_tmp = np.abs(pps.priors_data)
            index_tmp = priors_data_tmp.sum(axis=0) != 0
            prior_num_tfs = np.sum(index_tmp)
            #Debug print TFs
            #print priors_data_tmp.columns[index_tmp]
            #Debug #print priors_data_tmp.sum(axis=0)[index_tmp]
            max_outdegree = np.max(priors_data_tmp.sum(axis=0)[index_tmp])
            #Debug #print "max_outdegree", max_outdegree
            max_outdegree = np.int(max_outdegree)
            out_prior_tfs_outdegrees = "Num of TFs in prior: " + str(
                prior_num_tfs
            ) + " Mean and var of targets for TFs in prior: " + str(
                np.mean(priors_data_tmp.sum(axis=0)[index_tmp])) + " , " + str(
                    np.std(priors_data_tmp.sum(axis=0)[index_tmp]))
            str_output = str_output + out_prior_tfs_outdegrees + "\n"
            ax = priors_data_tmp.sum(axis=0)[index_tmp].plot(
                kind="hist", bins=list(range(0, max_outdegree + 1)))
            ax.set_title("Prior outdegrees distribution")
            ax.set_xlabel("outdegree of TFs ( i.e. TFs num of targets)")
            if self.flag_print:
                plt.savefig(output_path +
                            "/Prior outdegrees distribution_numTFs" +
                            str(prior_num_tfs) + "_numEdges" +
                            str(num_edges_prior))
            plt.close()

        #Save plot of Eval GS number of targets for each TF distribution
        gold_standard_tmp = np.abs(pps.gold_standard)
        index_tmp2 = gold_standard_tmp.sum(axis=0) != 0
        gs_num_tfs = np.sum(index_tmp2)
        max_outdegree2 = np.max(gold_standard_tmp.sum(axis=0)[index_tmp2])
        max_outdegree2 = np.int(max_outdegree2)
        #Debug #print gold_standard_tmp.sum(axis=0)[index_tmp2]
        #Debug #print max_outdegree2
        out_gs_tfs_outdegrees = "Num of TFs in eval gold standard: " + str(
            gs_num_tfs
        ) + " Mean and var of targets for TFs in eval GS: " + str(
            np.mean(gold_standard_tmp.sum(axis=0)[index_tmp2])) + " , " + str(
                np.std(gold_standard_tmp.sum(axis=0)[index_tmp2]))
        str_output = str_output + out_gs_tfs_outdegrees + "\n"
        #Debug print TFs
        #print gold_standard_tmp.columns[index_tmp2]
        ax1 = gold_standard_tmp.sum(axis=0)[index_tmp2].plot(
            kind="hist", bins=list(range(0, max_outdegree2 + 1)))
        ax1.set_title("Eval Gold standard outdegrees distribution")
        ax1.set_xlabel("outdegree of TFs ( i.e. TFs num of targets)")
        if self.flag_print:
            plt.savefig(output_path +
                        "/Eval Gold standard outdegrees distribution_numTFs" +
                        str(gs_num_tfs) + "_numEdges" + str(num_edges_gs))
        plt.close()

        if prior_type == "binary_all":
            #Write gold standard priors to file
            pps.priors_data.to_csv(input_dir + "/priors/" + prior_file,
                                   sep="\t")

        if self.flag_print:
            outfile = open(output_path + "/_preprocessing.txt", 'w')
            outfile.write("Run name: " + str(name_run) + "\n")
            outfile.write(str_output)

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            if len(steady_state_cond) > 0:
                #Debug
                if self.flag_print:
                    print("Leave-out points for steady state: ",
                          ss_lo_cond_names, ss_lo_indices)
                    outfile.write("Leave-out points for steady state: " +
                                  str(ss_lo_cond_names) + str(ss_lo_indices) +
                                  "\n")

        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            if self.flag_print:
                print("Leave-out points for timeseries: ", ts_lopoints_x,
                      ts_lopoints_y, timeseries_indices_lo)
                outfile.write("Leave-out points for timeseries: " +
                              str(ts_lopoints_x) + str(ts_lopoints_y) +
                              str(timeseries_indices_lo) + "\n")

        # print "New dimensions after coeff of var filter..."
        # outfile.write("New dimensions after coeff of var filter... \n")
        if self.flag_print:
            print("Expression dim: ", expression.shape)
            outfile.write("Expression dim: " + str(expression.shape) + "\n")
        if self.flag_print:
            print("Num of tfs: ", len(tf_names))
            outfile.write("Num of tfs: " + str(len(tf_names)) + "\n")
        if self.flag_print:
            print("Num of genes: ", len(genelist))
            outfile.write("Num of genes: " + str(len(genelist)) + "\n")
        if self.flag_print:
            if prior_type == "binary_all":
                print("Priors dim: ", pps.priors_data.shape)
                outfile.write("Priors dim: " + str(pps.priors_data.shape) +
                              "\n")
        if self.flag_print:
            print("Goldstandard dim: ", goldstandard.shape)
            outfile.write("Goldstandard dim: " + str(goldstandard.shape) +
                          "\n")

        #Print INFO to log file
        if self.flag_print:
            print("The number of genes is: ", len(genelist))
            outfile.write("The number of genes is: " + str(len(genelist)) +
                          "\n")
        if self.flag_print:
            print("The number of TFs is: ", len(tf_names))
            outfile.write("The number of TFs is: " + str(len(tf_names)) + "\n")
        if self.flag_print:
            print("The total Number of data points in the dataset is: ",
                  len(pps.meta_data))
            outfile.write(
                "The total Number of data points in the dataset is: " +
                str(len(pps.meta_data)) + "\n")
        if self.flag_print:
            print("The total number of time series is: ", len(TS_vectors))
            outfile.write("The total number of time series is: " +
                          str(len(TS_vectors)) + "\n")
        if self.flag_print:
            print("The number of total time points is: ",
                  num_total_timeseries_points)
            outfile.write("The number of total time points is: " +
                          str(num_total_timeseries_points) + "\n")
        if self.flag_print:
            print("The number of total steady state points is: ",
                  len(steady_state_cond))
            outfile.write("The number of total steady state points is: " +
                          str(len(steady_state_cond)) + "\n")

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):
            if self.flag_print:
                print(
                    "The percentage of leave-out steady state points is: ",
                    str(100 * float(len(ss_lo_indices)) /
                        len(steady_state_cond)))
                outfile.write(
                    "The percentage of leave-out steady state points is: " +
                    str(100 * float(len(ss_lo_indices)) /
                        len(steady_state_cond)) + "\n")

        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            if self.flag_print:
                print(
                    "The percentage of leave-out time series points is: ",
                    str(100 * float(len(timeseries_indices_lo)) /
                        num_total_timeseries_points))
                outfile.write(
                    "The percentage of leave-out time series points is: " +
                    str(100 * float(len(timeseries_indices_lo)) /
                        num_total_timeseries_points) + "\n")
                outfile.close()

        #All variables that can be returned if necessary
        # (All points)
        # TS_vectors, steady_state_cond, num_total_timeseries_points

        # #Training and leave out points
        # index_time_points_new, index_steady_state_new, pps.leave_out_ss_design(X_test_ss), pps.leave_out_ss_response, pps.leave_out_ts_design, pps.leave_out_ts_response

        # #leave out points
        # ss_lo_cond_names, ts_lopoints_x, ts_lopoints_y, timeseries_indices_lo

        if data_type == "SS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "SS")):

            X_test_ss = pps.leave_out_ss_design.transpose().values

            y_test_ss = pps.leave_out_ss_response.transpose().values
        else:
            X_test_ss = ""
            y_test_ss = ""

        deltas = []
        if data_type == "TS" or (data_type == "TS-SS" and
                                 (data_type_lo == "TS-SS"
                                  or data_type_lo == "TS")):
            X_test_ts = pps.leave_out_ts_design.transpose().values

            y_test_ts = pps.leave_out_ts_response.transpose().values

            ts_lopoints_y_keys = list(ts_lopoints_y.keys())

            for i, k in enumerate(ts_lopoints_x.keys()):
                # #Debug
                # #print "ts_lopoints_x[k]", ts_lopoints_x[k]
                # if float((ts_lopoints_x[k])) == 0:
                # 	log_of_frac = 1
                # else:
                # 	#No log
                # 	#log_of_frac = float(ts_lopoints_y[ts_lopoints_y_keys[i]]) / float((ts_lopoints_x[k]))
                #
                # 	log_of_frac = np.log(float(ts_lopoints_y[ts_lopoints_y_keys[i]]) / float((ts_lopoints_x[k])))
                #deltas.append(log_of_frac)

                #Original
                deltas.append(ts_lopoints_y[ts_lopoints_y_keys[i]] -
                              (ts_lopoints_x[k]))

            y_test_ts_future_timepoint = pps.expression_matrix.loc[
                genelist, ts_lopoints_y_keys].transpose().values

            x_test_ts_current_timepoint = pps.expression_matrix.loc[
                genelist, list(ts_lopoints_x.keys())].transpose().values

            x_test_ts_timepoint0 = pps.expression_matrix.loc[
                genelist, list(t0_lopoints.keys())].transpose().values

        else:
            X_test_ts = ""
            y_test_ts = ""
            y_test_ts_future_timepoint = ""
            x_test_ts_current_timepoint = ""
            x_test_ts_timepoint0 = ""

        #Debug
        #print y_test_ts_future_timepoint
        #print x_test_ts_current_timepoint

        return X, y, genelist, tf_names, goldstandard, output_path, pps.priors_data, X_test_ss, X_test_ts, y_test_ss, y_test_ts, x_test_ts_current_timepoint, y_test_ts_future_timepoint, deltas, x_test_ts_timepoint0, index_steady_state_new, index_time_points_new, pps.design, pps.delta_vect, pps.res_mat2