def read_dir(self, d):
        lookup = dict()
        scores_by_what = dict()
        if not os.path.exists(d):
            for p in DATA_SEARCH_PATH:
                if os.path.exists(os.path.join(p, d)):
                    d = os.path.join(p, d)
                    break
        for file in os.listdir(d):
            if file.endswith(".match"):
                for line in (open(os.path.join(d, file))):
                    if line.startswith("#"): continue
                    line = line.strip()
                    f = line.split()
                    if len(f) >= 4:
                        chr, start, end, what = f[0:4]
                    else:
                        continue

                    if not chr in lookup: lookup[chr] = intervals.Intersecter()
                    lookup[chr].add_interval(
                        intervals.Interval(int(start), int(end), what))
            else:
                k = file.split('.')[0]
                if not k in scores_by_what:
                    scores_by_what[k] = FileBinnedArray(
                        open(os.path.join(d, file)))

        if lookup == {}:
            self.lookup = None
            self.scores = scores_by_what
            #return None, scores_by_what
        else:
            self.lookup = lookup
            self.scores = scores_by_what
Пример #2
0
    def __init__(self,
                 qualfiles=None,
                 qualspecies=None,
                 minqual=None,
                 mask="?",
                 cache=100):
        if not qualfiles:
            raise Exception("No quality files.")
        if not qualspecies:
            raise Exception("No species dictionary.")
        if not minqual:
            raise Exception("No minimum quality specified.")
        self.mask = "?"
        self.minqual = minqual
        self.mask = mask
        self.total = 0
        self.masked = 0

        self.qualfiles = qualfiles
        self.qualspecies = qualspecies
        self.cache = cache * 2  # typical bin size is 512K
        # load quality files into FileBinnedArray
        self.qualities = {}
        for species, qualfile in self.qualfiles.items():
            specdict = {}
            for chrom in self.qualspecies[species]:
                specdict[chrom] = FileBinnedArray( \
                    open(qualfile + "." + chrom + ".bqv", "rb"), \
                    cache = self.cache/len(qualfiles) )
            self.qualities[species] = specdict
 def __getitem__(self, key):
     value = None
     if key in self.cache:
         value = self.cache[key]
     else:
         fname = os.path.join(self.dir, "%s.ba" % key)
         if os.path.exists(fname):
             value = FileBinnedArray(open(fname))
             self.cache[key] = value
     if value is None:
         raise KeyError("File does not exist: " + fname)
     return value