Пример #1
0
    def load_objects(self, is_bedgraph, verbose=False):
        """Load files and initialize object"""
        for i, t in enumerate(self.types):
            if verbose:
                print("Loading file ",
                      self.files[self.names[i]],
                      file=sys.stderr)

            if t not in ["regions", "genes"] and verbose:
                print("Cannot load objects", file=sys.stderr)

            if t == "regions":
                regions = GenomicRegionSet(self.names[i])
                if is_bedgraph:
                    regions.read_bedgraph(
                        os.path.abspath(self.files[self.names[i]]))

                else:
                    regions.read_bed(
                        os.path.abspath(self.files[self.names[i]])
                    )  # Here change the relative path into absolute path
                self.objectsDict[self.names[i]] = regions

            elif t == "genes":
                genes = GeneSet(self.names[i])
                genes.read(
                    os.path.abspath(self.files[self.names[i]])
                )  # Here change the relative path into absolute path
                self.objectsDict[self.names[i]] = genes
Пример #2
0
    def load_objects(self, is_bedgraph, verbose=False, test=False):
        """Load files and initialize object.

        *Keyword arguments:*

            - is_bedgraph -- Whether regions are in bedgraph format (default = False).
            - verbose -- Verbose output (default = False).
            - test -- Fetch only 10 regions form each BED files for test.
        """
        for i, t in enumerate(self.types):
            if verbose:
                print("Loading file ",
                      self.files[self.names[i]],
                      file=sys.stderr)

            if t not in ["regions", "genes"] and verbose:
                print("Cannot load objects", file=sys.stderr)

            if t == "regions":
                regions = GenomicRegionSet(self.names[i])
                if is_bedgraph:
                    regions.read_bedgraph(
                        os.path.abspath(self.files[self.names[i]]))
                else:
                    regions.read_bed(os.path.abspath(
                        self.files[self.names[i]]))
                    if test: regions.sequences = regions.sequences[0:11]
                self.objectsDict[self.names[i]] = regions

            elif t == "genes":
                genes = GeneSet(self.names[i])
                genes.read(
                    os.path.abspath(self.files[self.names[i]])
                )  # Here change the relative path into absolute path
                self.objectsDict[self.names[i]] = genes
Пример #3
0
    def load_objects(self, is_bedgraph, verbose=False, test=False):
        """Load files and initialize object.

        *Keyword arguments:*

            - is_bedgraph -- Whether regions are in bedgraph format (default = False).
            - verbose -- Verbose output (default = False).
            - test -- Fetch only 10 regions form each BED files for test.
        """
        for i, t in enumerate(self.types):
            if verbose: print("Loading file ", self.files[self.names[i]], file = sys.stderr)
            
            if t not in ["regions", "genes"] and verbose:
                print("Cannot load objects", file=sys.stderr)
            
            if t == "regions":
                regions = GenomicRegionSet(self.names[i])
                if is_bedgraph:
                    regions.read_bedgraph(os.path.abspath(self.files[self.names[i]]))
                    
                else:
                    if test:
                        g = GenomicRegionSet(self.names[i])
                        g.read_bed(os.path.abspath(self.files[self.names[i]]))
                        regions.sequences = g.sequences[0:11]
                    else:
                        regions.read_bed(os.path.abspath(self.files[self.names[i]]))  # Here change the relative path into absolute path
                self.objectsDict[self.names[i]] = regions
            
            elif t == "genes":
                genes = GeneSet(self.names[i])
                genes.read(os.path.abspath(self.files[self.names[i]]))  # Here change the relative path into absolute path
                self.objectsDict[self.names[i]] = genes
Пример #4
0
args = parser.parse_args()


if os.path.isfile(args.i):
    print()

    for switch in [True, False]:
        print("HAVANA filter:\t\t"+str(switch))
        print("protein_coding filter:\t"+"True")
        print("known_only filter:\t"+"True")
        ann = AnnotationSet(args.organism, alias_source=args.organism,
                            filter_havana=switch, 
                            protein_coding=True, 
                            known_only=True)
        
        genes = GeneSet("genes")
        genes.read(args.i)
        print("\tInput gene number: \t"+str(len(genes)))
        print("\tFixing the names into Ensembl ID:")
        de_ensembl, unmap_gs, ensembl2symbol = ann.fix_gene_names(gene_set=genes, output_dict=True, mute_warn=True)
        print("\t\tMapped:\t\t"+str(len(de_ensembl)))
        print("\t\tUnmapped:\t"+str(len(unmap_gs)))

        genes.genes = de_ensembl
        
        de_prom, unmapped_gene_list = ann.get_promoters(promoter_length=1000,
                                                        gene_set=genes,
                                                        unmaplist=True)
        print("\tGetting promoters:")
        print("\t\tMapped:\t\t"+str(len(de_prom)))
        print("\t\tUnmapped:\t"+str(len(unmapped_gene_list)))
Пример #5
0
    outputdir = args[3]
    
    
#     experimental_matrix_file = "/home/manuel/workspace/cluster_p/THOR/exp/exp23_macs2_payton/1"
#     gene_exp = "/home/manuel/workspace/cluster_p/allhoff/project_THOR/data/payton/gene_expression/CCmean.data"
#     annotation_path = "/home/manuel/workspace/cluster_h/rgtdata/hg19/"
#     outputdir = "/home/manuel/test/"
    
    exps = ExperimentalMatrix()
    exps.read(experimental_matrix_file)
    regionsets = exps.get_regionsets()
    
    genome_file = annotation_path + "/chrom.sizes"
    gene_file = annotation_path + "/association_file.bed"
    
    genes = GeneSet("Expression")
    genes.read_expression(gene_exp)
    
    for region in regionsets:
        bedNew = GenomicRegionSet("")
        [degenes, de_peak_genes, mappedGenes, totalPeaks, regionsToGenes] \
        = bedNew.filter_by_gene_association_old(region.fileName, genes.genes, gene_file, genome_file)
        
        [ct, labels] = averageExpression(region, genes, regionsToGenes)
        aux = region.fileName.split("/")
        fileName = aux[-1]
        fileName = fileName.split(".")
        output(genes.cond, labels, ct, outputdir + "/" + fileName[0] + ".txt")
        
        
Пример #6
0
    annotation_path = args[2]
    outputdir = args[3]
    
    
#     experimental_matrix_file = "/home/manuel/workspace/cluster_p/THOR/exp/exp23_macs2_payton/1"
#     gene_exp = "/home/manuel/workspace/cluster_p/allhoff/project_THOR/data/payton/gene_expression/CCmean.data"
#     annotation_path = "/home/manuel/workspace/cluster_h/rgtdata/hg19/"
#     outputdir = "/home/manuel/test/"
    
    exps = ExperimentalMatrix()
    exps.read(experimental_matrix_file)
    regionsets = exps.get_regionsets()
    
    genome_file = annotation_path + "/chrom.sizes"
    gene_file = annotation_path + "/association_file.bed"
    
    genes = GeneSet("Expression")
    genes.read_expression(gene_exp)
    
    for region in regionsets:
        bedNew = GenomicRegionSet("")
        [degenes, de_peak_genes, mappedGenes, totalPeaks, regionsToGenes] \
        = bedNew.filter_by_gene_association_old(region.fileName, genes.genes, gene_file, genome_file, threshDist=options.dist)
        
        [ct, labels] = averageExpression(region, genes, regionsToGenes)
        fileName = path.splitext(path.basename(region.fileName))[0]
	output(genes.cond, labels, ct, path.join(outputdir, fileName + ".txt"))
        
        

Пример #7
0
args = parser.parse_args()


if os.path.isfile(args.i):
    print()

    for switch in [True, False]:
        print("HAVANA filter:\t\t"+str(switch))
        print("protein_coding filter:\t"+"True")
        print("known_only filter:\t"+"True")
        ann = AnnotationSet(args.organism, alias_source=args.organism,
                            filter_havana=switch, 
                            protein_coding=True, 
                            known_only=True)
        
        genes = GeneSet("genes")
        genes.read(args.i)
        print("\tInput gene number: \t"+str(len(genes)))
        print("\tFixing the names into Ensembl ID:")
        de_ensembl, unmap_gs, ensembl2symbol = ann.fix_gene_names(gene_set=genes, output_dict=True, mute_warn=True)
        print("\t\tMapped:\t\t"+str(len(de_ensembl)))
        print("\t\tUnmapped:\t"+str(len(unmap_gs)))

        genes.genes = de_ensembl
        
        de_prom, unmapped_gene_list = ann.get_promoters(promoter_length=1000,
                                                        gene_set=genes,
                                                        unmaplist=True)
        print("\tGetting promoters:")
        print("\t\tMapped:\t\t"+str(len(de_prom)))
        print("\t\tUnmapped:\t"+str(len(unmapped_gene_list)))