Пример #1
0
        params.ctcf_reader_orientOnly.keep_only_with_orient_data()
        OrientBlocksCTCFpg = OrientBlocksPredictorGenerator(params.ctcf_reader_orientOnly,
                                                             params.window_size)
        ConvergentPairPG = ConvergentPairPredictorGenerator(params.ctcf_reader, binsize=params.window_size)

        #Read other chip-seq data
        logging.info('create chipPG')
        chipPG = []
        filenames_df = pd.read_csv(input_folder + "peaks/filenames.csv")
        # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name'])
        # print(len(os.listdir(input_folder + 'peaks/')))
        # print(len(filenames_df['name']))
        # proteins=set(["RAD21", "SMC3", "POLR2A", "H3K27ac", "H3K27me3", "DNase-seq", "H3K9me3", "H3K4me1", "H3K4me2", "H3K4me3", "YY1"])
        for index, row in filenames_df.iterrows():
            # if row["name"] in proteins:
                params.chip_reader = ChiPSeqReader(input_folder + 'peaks/' + row["filename"] + '.gz', name=row['name'])
                params.chip_reader.read_file()
                chipPG.append(SmallChipSeqPredictorGenerator(params.chip_reader,params.window_size,N_closest=4))
        # assert len(chipPG)==len(proteins)
        # #
        # #Read methylation data
        logging.info('create metPG')
        metPG = []
        filemanes_df = pd.read_csv(input_folder + "methylation/filenames.csv")
        # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name'])
        for index, row in filemanes_df.iterrows():
            #print(row["name"])
            params.met_reader = ChiPSeqReader(input_folder + 'methylation/'+ row["filename"], name=row['name'])
            params.met_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"})
            metPG.append(SmallChipSeqPredictorGenerator(params.met_reader,params.window_size,N_closest=4))
        #Read cage data
Пример #2
0
            params.ctcf_reader_orientOnly, params.window_size)
        ConvergentPairPG = ConvergentPairPredictorGenerator(
            params.ctcf_reader, binsize=params.window_size)

        #Read other chip-seq data
        logging.info('create chipPG')
        chipPG = []
        filenames_df = pd.read_csv(input_folder + "H1/Chip-seq/filenames.csv")
        # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name'])
        # print(len(os.listdir(input_folder + 'peaks/')))
        # print(len(filenames_df['name']))
        # proteins=set(["RAD21", "SMC3", "POLR2A", "H3K27ac", "H3K27me3", "DNase-seq", "H3K9me3", "H3K4me1", "H3K4me2", "H3K4me3", "YY1"])
        for index, row in filenames_df.iterrows():
            # if row["name"] in proteins:
            params.chip_reader = ChiPSeqReader(input_folder + 'H1/Chip-seq/' +
                                               row["filename"],
                                               name=row['name'])
            params.chip_reader.read_file()
            chipPG.append(
                SmallChipSeqPredictorGenerator(params.chip_reader,
                                               params.window_size,
                                               N_closest=4))

        # #Read RNA-Seq data
        # params.RNAseqReader = RNAseqReader(fname=input_folder + "RNA-seq/rna-seqPolyA.tsvpre.txt",
        #                                    name="RNA")
        # params.RNAseqReader.read_file(rename={ "Gene name": "gene",
        #                       "Gene start (bp)": "start",
        #                       "Gene end (bp)": "end",
        #                       "Chromosome/scaffold name": "chr",
        #                       "FPKM": "sigVal"},