def doWork(ref, chrm, truth, canavar): ds = prepareDS() loadChrm(ds, ref, chrm) i_truth, i_canavar = 1, 2 loadCalls(ds, truth, i_truth, chrm) loadCalls(ds, canavar, i_canavar, chrm) computeDistCopyNumbers(ds) log("MEM: %s" % drdcommon.memory_usage()) log('Done')
def __update_counts(self): n_subs = self.__seen_in_more_than_n_samples(self.subs, self.at_least_seen) n_indels = self.__seen_in_more_than_n_samples(self.indels, self.at_least_seen) n_genes = self.__seen_in_more_than_n_samples_for_genes( self.genes_partial, self.at_least_seen) self.counts[self.n_samples] = (n_subs, n_indels, n_genes) sys.stderr.write(">> #:%s SUBS:%s INDELS:%s GENES:%s MEM(Mbytes):%s AT_LEAST_SEEN:%s\n" % \ (self.n_samples, n_subs, n_indels, n_genes, drdcommon.memory_usage(), self.at_least_seen))
def __load_species_snp_coordinates(self): fd = drdcommon.xopen(self.coor_fn) d = {} self.d_species_coor = d n = 0 for l in fd: n += 1 chrm, coor = l.split() if not d.has_key(chrm): d[chrm] = {} d[chrm][int(coor)] = 1 fd.close() logging.info("# of coordinates loaded: %d" % n) logging.info("current memory usage in %dkb" % drdcommon.memory_usage())
def loadCalls(ds, fn, idx, chrm): log("Loading calls from %s; idx=%s" % (fn, idx)) chrm_found = False nbp = 0 for l in drdcommon.xopen(fn): c, start, end, cnv = l.strip().split() if c == chrm: chrm_found = True for i in range(int(start), int(end)+1): if nbp % 1000000 == 0: sys.stderr.write("MEM: %s nbp: %s\r" % (drdcommon.memory_usage(), nbp)) ds[idx][i] = round(float(cnv)) nbp += 1 if not chrm_found: error("\nCould not find chrm in file. Bailing out.") log("\n%s bp loaded" % nbp)
def loadCalls(ds, fn, idx, chrm): log("Loading calls from %s; idx=%s" % (fn, idx)) chrm_found = False nbp = 0 for l in drdcommon.xopen(fn): c, start, end, cnv = l.strip().split() if c == chrm: chrm_found = True for i in range(int(start), int(end) + 1): if nbp % 1000000 == 0: sys.stderr.write("MEM: %s nbp: %s\r" % (drdcommon.memory_usage(), nbp)) ds[idx][i] = round(float(cnv)) nbp += 1 if not chrm_found: error("\nCould not find chrm in file. Bailing out.") log("\n%s bp loaded" % nbp)
def loadChrm(ds, ref, chrm): if not os.path.exists(ref): error("Cannot find reference file: %s", ref) log("Reading reference genome chrm: %s" % chrm) i = 1 for l in drdcommon.xopen(ref): l = l.strip() if i == 1 and l[0] == '>' and l[1:] == chrm: continue if i > 1 and l[0] == '>': break for bp in l: if bp.upper() != 'N': ds[0][i] = 1 if i % 10000000 == 0: sys.stderr.write("MEM: %s nbp: %s\r" % (drdcommon.memory_usage(), i)) i += 1 log("\n%s bp read." % i)