logging.info('create metPG') metPG = [] filemanes_df = pd.read_csv(input_folder + "methylation/filenames.csv") # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name']) for index, row in filemanes_df.iterrows(): #print(row["name"]) params.met_reader = ChiPSeqReader(input_folder + 'methylation/'+ row["filename"], name=row['name']) params.met_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"}) metPG.append(SmallChipSeqPredictorGenerator(params.met_reader,params.window_size,N_closest=4)) #Read cage data cagePG = [] filemanes_df = pd.read_csv(input_folder + "cage/filenames.csv") # assert len(os.listdir(input_folder + 'cage/')) - 1 == len(filemanes_df['name']) for index, row in filemanes_df.iterrows(): #print(row["name"]) params.cage_reader = ChiPSeqReader(input_folder+"cage/GSM849365_hg19_wgEncodeRikenCageK562CellPapClusters.bed.gz", name=row['name'])# + "cage/" + row["filename"], name=row['name']) params.cage_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"}) cagePG.append(SmallChipSeqPredictorGenerator(params.cage_reader,params.window_size,N_closest=4)) #Read RNA-Seq data params.RNAseqReader = RNAseqReader(fname=input_folder + "RNA-seq/rna-seqPolyA.tsvpre.txt", name="RNA") params.RNAseqReader.read_file(rename={ "Gene name": "gene", "Gene start (bp)": "start", "Gene end (bp)": "end", "Chromosome/scaffold name": "chr", "FPKM": "sigVal"}, sep="\t") RNAseqPG = SmallChipSeqPredictorGenerator(params.RNAseqReader, window_size=params.window_size, N_closest=3) # #Read TSS data
# metPG = [] # filemanes_df = pd.read_csv(input_folder + "methylation/filenames.csv") # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name']) # for index, row in filemanes_df.iterrows(): # #print(row["name"]) # params.met_reader = ChiPSeqReader(input_folder + 'methylation/'+ row["filename"], name=row['name']) # params.met_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"}) # metPG.append(SmallChipSeqPredictorGenerator(params.met_reader,params.window_size,N_closest=4)) #Read cage data cagePG = [] filemanes_df = pd.read_csv(input_folder + "cage/filenames.csv") # assert len(os.listdir(input_folder + 'cage/')) - 1 == len(filemanes_df['name']) for index, row in filemanes_df.iterrows(): #print(row["name"]) params.cage_reader = ChiPSeqReader(input_folder + "cage/" + row["filename"], name=row['name']) params.cage_reader.read_file(renamer={ "0": "chr", "1": "start", "2": "end", "4": "sigVal" }) cagePG.append( SmallChipSeqPredictorGenerator(params.cage_reader, params.window_size, N_closest=4)) #Read RNA-Seq data params.RNAseqReader = RNAseqReader(fname=input_folder + "RNA-seq/rna-seqPolyA.tsvpre.txt", name="RNA")