示例#1
0
    def inverseAssoc(self, gene):
        """Returns terms associated with gene"""

        gene = Gene.canonicalName(gene)
        for term in self.associations:
            if gene in self[term]:
                yield term
示例#2
0
    def fromFile(cls, inputFileName, taxons=None, dataset=None):
        """Decides file type and reads relevant data."""
        debug("Reading gene associations file %s...%s" %
              (inputFileName,
               ("" if dataset is None else " Dataset size is %d." %
                len(dataset))))
        #open = gzip.open if inputFileName.endswith(".gz") else __builtins__.open

        if inputFileName.endswith('.pickle') or inputFileName.endswith(
                '.pickle_reserved'):
            # Serialized data = much faster
            with open(inputFileName, 'rb') as f:
                associations, alltaxons = pickle.load(f)
        else:
            associations = defaultdict(set)
            alltaxons = set()

            with open(inputFileName, 'rb') as associationFile:
                for line in associationFile.read().decode('utf8').splitlines():
                    if line.startswith('!'): continue
                    line = line.split('\t')
                    taxon = {int(x.split(':')[1]) for x in line[12].split('|')}
                    alltaxons.update(taxon)
                    gene = Gene.canonicalName(line[2])
                    term = line[4]
                    if (taxons is None or taxons.intersection(taxon)) and \
                       (dataset is None or gene in dataset):
                        associations[term].add(gene)
        debug("Finished reading gene associations file %s... " % inputFileName)
        #if dataset is not None:
        #    d = dataset.difference(allgenes)
        #    if d:
        #        debug("Missing genes: %s!!!" % ", ".join(d))
        return cls(associations, alltaxons, dataset)
示例#3
0
    def fromFile(cls, inputFileName, taxons = None, dataset = None):
        """Decides file type and reads relevant data."""
        debug("Reading gene associations file %s...%s" % (inputFileName, ("" if dataset is None else " Dataset size is %d." % len(dataset))))
        #open = gzip.open if inputFileName.endswith(".gz") else __builtins__.open

        if inputFileName.endswith('.pickle') or inputFileName.endswith('.pickle_reserved'):
            # Serialized data = much faster
            with open(inputFileName, 'rb') as f:
                associations, alltaxons = pickle.load(f)
        else:
            associations = defaultdict(set)
            alltaxons = set()

            with open(inputFileName, 'rb') as associationFile:
                for line in associationFile.read().decode('utf8').splitlines():
                    if line.startswith('!'): continue
                    line = line.split('\t')
                    taxon = {int(x.split(':')[1]) for x in line[12].split('|')}
                    alltaxons.update(taxon)
                    gene = Gene.canonicalName(line[2])
                    term = line[4]
                    if (taxons is None or taxons.intersection(taxon)) and \
                       (dataset is None or gene in dataset):
                        associations[term].add(gene)
        debug("Finished reading gene associations file %s... " % inputFileName)
        #if dataset is not None:
        #    d = dataset.difference(allgenes)
        #    if d:
        #        debug("Missing genes: %s!!!" % ", ".join(d))
        return cls(associations, alltaxons, dataset)
示例#4
0
    def inverseAssoc(self, gene):
        """Returns terms associated with gene"""

        gene = Gene.canonicalName(gene)
        for term in self.associations:
            if gene in self[term]:
                yield term