params.ctcf_reader_orientOnly.set_sites_orientation( input_folder + "CTCF/wgEncodeAwgTfbsHaibK562CtcfcPcr1xUniPk.narrowPeak-orient.bed" ) params.ctcf_reader_orientOnly.keep_only_with_orient_data() # set corresponding predictor generators and its options: OrientBlocksCTCFpg = OrientBlocksPredictorGenerator( params.ctcf_reader_orientOnly, params.window_size) ConvergentPairPG = ConvergentPairPredictorGenerator( params.ctcf_reader, binsize=params.window_size) # Read RNA-Seq data # RNA-seq_file format: this file should have fields "gene", "start", "end", "chr","FPKM" # you can rename table fields below params.RNAseqReader = RNAseqReader(fname=input_folder + "RNA-seq/rna-seqPolyA.tsvpre.txt", name="RNA") # read RNA-seq data and rename table fields params.RNAseqReader.read_file(rename={ "Gene name": "gene", "Gene start (bp)": "start", "Gene end (bp)": "end", "Chromosome/scaffold name": "chr", "FPKM": "sigVal" }, sep="\t") # set corresponding predictor generators and its options: RNAseqPG = SmallChipSeqPredictorGenerator( params.RNAseqReader, window_size=params.window_size, N_closest=3) # write all predictor generators which you want to use:
# #print(row["name"]) # params.met_reader = ChiPSeqReader(input_folder + 'methylation/'+ row["filename"], name=row['name']) # params.met_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"}) # metPG.append(SmallChipSeqPredictorGenerator(params.met_reader,params.window_size,N_closest=4)) # #Read cage data # cagePG = [] # filemanes_df = pd.read_csv(input_folder + "cage/filenames.csv") # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name']) # for index, row in filemanes_df.iterrows(): # #print(row["name"]) # params.cage_reader = ChiPSeqReader(input_folder + 'cage/' + row["filename"], name=row['name']) # params.cage_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"}) # cagePG.append(SmallChipSeqPredictorGenerator(params.cage_reader,params.window_size,N_closest=4)) # Read RNA-Seq data params.RNAseqReader = RNAseqReader( fname=input_folder + "RNA/GSE95111_genes.fpkm_table.txt.pre.txt", name="RNA") params.RNAseqReader.read_file(rename={ "Gene name": "gene", "Gene start (bp)": "start", "Gene end (bp)": "end", "Chromosome/scaffold name": "chr", "shCtrl-1_0": "sigVal" }, sep="\t") RNAseqPG = SmallChipSeqPredictorGenerator( params.RNAseqReader, window_size=params.window_size, N_closest=3) #Read E1 data params.eig_reader = E1Reader() params.eig_reader.read_files(
params.window_size, N_closest=4) ctcf_reader_orientOnly = ChiPSeqReader(CTCF_file, name="CTCF") ctcf_reader_orientOnly.read_file() ctcf_reader_orientOnly.set_sites_orientation(CTCF_orient_file) ctcf_reader_orientOnly.keep_only_with_orient_data() # set corresponding predictor generators and its options: OrientBlocksCTCFpg = OrientBlocksPredictorGenerator( ctcf_reader_orientOnly, params.window_size) ConvergentPairPG = ConvergentPairPredictorGenerator( params.ctcf_reader, binsize=params.window_size) #Read RNA-Seq data #RNA-seq_file format: this file should have fields "gene", "start", "end", "chr","FPKM" #you can rename table fields below params.RNAseqReader = RNAseqReader(RNA_seq_file, name="RNA") #read RNA-seq data and rename table fields params.RNAseqReader.read_file(rename={ "FPKM": "sigVal", "Gene start (bp)": "start", "Gene end (bp)": "end", "Chromosome/scaffold name": "chr", "Gene name": "gene" }, sep="\t") # set corresponding predictor generators and its options: RNAseqPG = SmallChipSeqPredictorGenerator(params.RNAseqReader, window_size=params.window_size, N_closest=3) params.pgs = [ OrientCtcfpg, NotOrientCTCFpg, OrientBlocksCTCFpg, RNAseqPG,
# # for index, row in filemanes_df.iterrows(): # # #print(row["name"]) # # params.met_reader = ChiPSeqReader(input_folder + 'methylation/'+ row["filename"], name=row['name']) # # params.met_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"}) # # metPG.append(SmallChipSeqPredictorGenerator(params.met_reader,params.window_size,N_closest=4)) # # #Read cage data # # cagePG = [] # # filemanes_df = pd.read_csv(input_folder + "cage/filenames.csv") # # # assert len(os.listdir(input_folder + 'cage/')) - 1 == len(filemanes_df['name']) # # for index, row in filemanes_df.iterrows(): # # #print(row["name"]) # # params.cage_reader = ChiPSeqReader(input_folder+"cage/GSM849365_hg19_wgEncodeRikenCageK562CellPapClusters.bed.gz", name=row['name'])# + "cage/" + row["filename"], name=row['name']) # # params.cage_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"}) # # cagePG.append(SmallChipSeqPredictorGenerator(params.cage_reader,params.window_size,N_closest=4)) #Read RNA-Seq data params.RNAseqReader = RNAseqReader(fname=input_folder + "RNA-seq/GSM2533845_NPC_rep1.txtpre.txt", name="RNA") params.RNAseqReader.read_file(rename={ "Gene name": "gene", "Gene start (bp)": "start", "Gene end (bp)": "end", "Chromosome/scaffold name": "chr", "fpkm": "sigVal"}, sep="\t") RNAseqPG = SmallChipSeqPredictorGenerator(params.RNAseqReader, window_size=params.window_size, N_closest=3) # #Read TSS data # params.TssReader=TssReader(fname=input_folder + "TSS/NCBI_refSeq_hg19.bed", name="TSS") # params.TssReader.read_file() # TSSPG=Distance_to_TSS_PG(params.TssReader)
# logging.info('create chipPG') # chipPG = [] # filenames_df = pd.read_csv(input_folder + "H1/Chip-seq/filenames.csv") # # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name']) # # print(len(os.listdir(input_folder + 'peaks/'))) # # print(len(filenames_df['name'])) # # proteins=set(["RAD21", "SMC3", "POLR2A", "H3K27ac", "H3K27me3", "DNase-seq", "H3K9me3", "H3K4me1", "H3K4me2", "H3K4me3", "YY1"]) # for index, row in filenames_df.iterrows(): # # if row["name"] in proteins: # params.chip_reader = ChiPSeqReader(input_folder + 'H1/Chip-seq/' + row["filename"], name=row['name']) # params.chip_reader.read_file() # chipPG.append(SmallChipSeqPredictorGenerator(params.chip_reader,params.window_size,N_closest=4)) #Read RNA-Seq data params.RNAseqReader = RNAseqReader( fname=input_folder + "mast_cells/RNA-seq/GSE75526_fpkm (1).pre.txt", name="RNA") params.RNAseqReader.read_file(rename={ "Gene name": "gene", "Gene start (bp)": "start", "Gene end (bp)": "end", "Chromosome/scaffold name": "chr", "FPKM": "sigVal" }, sep="\t") RNAseqPG = SmallChipSeqPredictorGenerator( params.RNAseqReader, window_size=params.window_size, N_closest=3) params.pgs = [ OrientCtcfpg, NotOrientCTCFpg, OrientBlocksCTCFpg, ConvergentPairPG, RNAseqPG