def load_objects(self, is_bedgraph, verbose=False): """Load files and initialize object""" for i, t in enumerate(self.types): if verbose: print("Loading file ", self.files[self.names[i]], file=sys.stderr) if t not in ["regions", "genes"] and verbose: print("Cannot load objects", file=sys.stderr) if t == "regions": regions = GenomicRegionSet(self.names[i]) if is_bedgraph: regions.read_bedgraph( os.path.abspath(self.files[self.names[i]])) else: regions.read_bed( os.path.abspath(self.files[self.names[i]]) ) # Here change the relative path into absolute path self.objectsDict[self.names[i]] = regions elif t == "genes": genes = GeneSet(self.names[i]) genes.read( os.path.abspath(self.files[self.names[i]]) ) # Here change the relative path into absolute path self.objectsDict[self.names[i]] = genes
def load_objects(self, is_bedgraph, verbose=False, test=False): """Load files and initialize object. *Keyword arguments:* - is_bedgraph -- Whether regions are in bedgraph format (default = False). - verbose -- Verbose output (default = False). - test -- Fetch only 10 regions form each BED files for test. """ for i, t in enumerate(self.types): if verbose: print("Loading file ", self.files[self.names[i]], file=sys.stderr) if t not in ["regions", "genes"] and verbose: print("Cannot load objects", file=sys.stderr) if t == "regions": regions = GenomicRegionSet(self.names[i]) if is_bedgraph: regions.read_bedgraph( os.path.abspath(self.files[self.names[i]])) else: regions.read_bed(os.path.abspath( self.files[self.names[i]])) if test: regions.sequences = regions.sequences[0:11] self.objectsDict[self.names[i]] = regions elif t == "genes": genes = GeneSet(self.names[i]) genes.read( os.path.abspath(self.files[self.names[i]]) ) # Here change the relative path into absolute path self.objectsDict[self.names[i]] = genes
def load_objects(self, is_bedgraph, verbose=False, test=False): """Load files and initialize object. *Keyword arguments:* - is_bedgraph -- Whether regions are in bedgraph format (default = False). - verbose -- Verbose output (default = False). - test -- Fetch only 10 regions form each BED files for test. """ for i, t in enumerate(self.types): if verbose: print("Loading file ", self.files[self.names[i]], file = sys.stderr) if t not in ["regions", "genes"] and verbose: print("Cannot load objects", file=sys.stderr) if t == "regions": regions = GenomicRegionSet(self.names[i]) if is_bedgraph: regions.read_bedgraph(os.path.abspath(self.files[self.names[i]])) else: if test: g = GenomicRegionSet(self.names[i]) g.read_bed(os.path.abspath(self.files[self.names[i]])) regions.sequences = g.sequences[0:11] else: regions.read_bed(os.path.abspath(self.files[self.names[i]])) # Here change the relative path into absolute path self.objectsDict[self.names[i]] = regions elif t == "genes": genes = GeneSet(self.names[i]) genes.read(os.path.abspath(self.files[self.names[i]])) # Here change the relative path into absolute path self.objectsDict[self.names[i]] = genes
args = parser.parse_args() if os.path.isfile(args.i): print() for switch in [True, False]: print("HAVANA filter:\t\t"+str(switch)) print("protein_coding filter:\t"+"True") print("known_only filter:\t"+"True") ann = AnnotationSet(args.organism, alias_source=args.organism, filter_havana=switch, protein_coding=True, known_only=True) genes = GeneSet("genes") genes.read(args.i) print("\tInput gene number: \t"+str(len(genes))) print("\tFixing the names into Ensembl ID:") de_ensembl, unmap_gs, ensembl2symbol = ann.fix_gene_names(gene_set=genes, output_dict=True, mute_warn=True) print("\t\tMapped:\t\t"+str(len(de_ensembl))) print("\t\tUnmapped:\t"+str(len(unmap_gs))) genes.genes = de_ensembl de_prom, unmapped_gene_list = ann.get_promoters(promoter_length=1000, gene_set=genes, unmaplist=True) print("\tGetting promoters:") print("\t\tMapped:\t\t"+str(len(de_prom))) print("\t\tUnmapped:\t"+str(len(unmapped_gene_list)))
outputdir = args[3] # experimental_matrix_file = "/home/manuel/workspace/cluster_p/THOR/exp/exp23_macs2_payton/1" # gene_exp = "/home/manuel/workspace/cluster_p/allhoff/project_THOR/data/payton/gene_expression/CCmean.data" # annotation_path = "/home/manuel/workspace/cluster_h/rgtdata/hg19/" # outputdir = "/home/manuel/test/" exps = ExperimentalMatrix() exps.read(experimental_matrix_file) regionsets = exps.get_regionsets() genome_file = annotation_path + "/chrom.sizes" gene_file = annotation_path + "/association_file.bed" genes = GeneSet("Expression") genes.read_expression(gene_exp) for region in regionsets: bedNew = GenomicRegionSet("") [degenes, de_peak_genes, mappedGenes, totalPeaks, regionsToGenes] \ = bedNew.filter_by_gene_association_old(region.fileName, genes.genes, gene_file, genome_file) [ct, labels] = averageExpression(region, genes, regionsToGenes) aux = region.fileName.split("/") fileName = aux[-1] fileName = fileName.split(".") output(genes.cond, labels, ct, outputdir + "/" + fileName[0] + ".txt")
annotation_path = args[2] outputdir = args[3] # experimental_matrix_file = "/home/manuel/workspace/cluster_p/THOR/exp/exp23_macs2_payton/1" # gene_exp = "/home/manuel/workspace/cluster_p/allhoff/project_THOR/data/payton/gene_expression/CCmean.data" # annotation_path = "/home/manuel/workspace/cluster_h/rgtdata/hg19/" # outputdir = "/home/manuel/test/" exps = ExperimentalMatrix() exps.read(experimental_matrix_file) regionsets = exps.get_regionsets() genome_file = annotation_path + "/chrom.sizes" gene_file = annotation_path + "/association_file.bed" genes = GeneSet("Expression") genes.read_expression(gene_exp) for region in regionsets: bedNew = GenomicRegionSet("") [degenes, de_peak_genes, mappedGenes, totalPeaks, regionsToGenes] \ = bedNew.filter_by_gene_association_old(region.fileName, genes.genes, gene_file, genome_file, threshDist=options.dist) [ct, labels] = averageExpression(region, genes, regionsToGenes) fileName = path.splitext(path.basename(region.fileName))[0] output(genes.cond, labels, ct, path.join(outputdir, fileName + ".txt"))