示例#1
0
def doWork(ref, chrm, truth, canavar):
    ds = prepareDS()
    loadChrm(ds, ref, chrm)
    i_truth, i_canavar = 1, 2
    loadCalls(ds, truth, i_truth, chrm)
    loadCalls(ds, canavar, i_canavar, chrm)
    computeDistCopyNumbers(ds)
    log("MEM: %s" % drdcommon.memory_usage())
    log('Done')
示例#2
0
def doWork(ref, chrm, truth, canavar):
	ds = prepareDS()
	loadChrm(ds, ref, chrm)
	i_truth, i_canavar = 1, 2
	loadCalls(ds, truth, i_truth, chrm)
	loadCalls(ds, canavar, i_canavar, chrm)
	computeDistCopyNumbers(ds)
	log("MEM: %s" % drdcommon.memory_usage())
	log('Done')
示例#3
0
 def __update_counts(self):
     n_subs = self.__seen_in_more_than_n_samples(self.subs,
                                                 self.at_least_seen)
     n_indels = self.__seen_in_more_than_n_samples(self.indels,
                                                   self.at_least_seen)
     n_genes = self.__seen_in_more_than_n_samples_for_genes(
         self.genes_partial, self.at_least_seen)
     self.counts[self.n_samples] = (n_subs, n_indels, n_genes)
     sys.stderr.write(">> #:%s SUBS:%s INDELS:%s GENES:%s MEM(Mbytes):%s AT_LEAST_SEEN:%s\n" % \
       (self.n_samples, n_subs, n_indels, n_genes, drdcommon.memory_usage(), self.at_least_seen))
示例#4
0
 def __load_species_snp_coordinates(self):
     fd = drdcommon.xopen(self.coor_fn)
     d = {}
     self.d_species_coor = d
     n = 0
     for l in fd:
         n += 1
         chrm, coor = l.split()
         if not d.has_key(chrm):
             d[chrm] = {}
         d[chrm][int(coor)] = 1
     fd.close()
     logging.info("# of coordinates loaded: %d" % n)
     logging.info("current memory usage in %dkb" % drdcommon.memory_usage())
示例#5
0
 def __load_species_snp_coordinates(self):
   fd = drdcommon.xopen(self.coor_fn)
   d = {}
   self.d_species_coor = d
   n = 0
   for l in fd:
     n += 1
     chrm, coor = l.split()
     if not d.has_key(chrm):
       d[chrm] = {}
     d[chrm][int(coor)] = 1
   fd.close()
   logging.info("# of coordinates loaded: %d" % n)
   logging.info("current memory usage in %dkb" % drdcommon.memory_usage())
示例#6
0
def loadCalls(ds, fn, idx, chrm):
	log("Loading calls from %s; idx=%s" % (fn, idx))
	chrm_found = False
	nbp = 0
	for l in drdcommon.xopen(fn):
		c, start, end, cnv = l.strip().split()
		if c == chrm:
			chrm_found = True
			for i in range(int(start), int(end)+1):
				if nbp % 1000000 == 0:
					sys.stderr.write("MEM: %s nbp: %s\r" % (drdcommon.memory_usage(), nbp))
				ds[idx][i] = round(float(cnv))
				nbp += 1

	if not chrm_found:
		error("\nCould not find chrm in file. Bailing out.")
	log("\n%s bp loaded" % nbp)
示例#7
0
def loadCalls(ds, fn, idx, chrm):
    log("Loading calls from %s; idx=%s" % (fn, idx))
    chrm_found = False
    nbp = 0
    for l in drdcommon.xopen(fn):
        c, start, end, cnv = l.strip().split()
        if c == chrm:
            chrm_found = True
            for i in range(int(start), int(end) + 1):
                if nbp % 1000000 == 0:
                    sys.stderr.write("MEM: %s nbp: %s\r" %
                                     (drdcommon.memory_usage(), nbp))
                ds[idx][i] = round(float(cnv))
                nbp += 1

    if not chrm_found:
        error("\nCould not find chrm in file. Bailing out.")
    log("\n%s bp loaded" % nbp)
示例#8
0
def loadChrm(ds, ref, chrm):
	if not os.path.exists(ref):
		error("Cannot find reference file: %s", ref)

	log("Reading reference genome chrm: %s" % chrm)
	i = 1
	for l in drdcommon.xopen(ref):
		l = l.strip()
		if i == 1 and l[0] == '>' and l[1:] == chrm:
			continue
		if i > 1 and l[0] == '>':
			break

		for bp in l:
			if bp.upper() != 'N':
				ds[0][i] = 1
			if i % 10000000 == 0:
				sys.stderr.write("MEM: %s nbp: %s\r" % (drdcommon.memory_usage(), i))
			i += 1
	log("\n%s bp read." % i)
示例#9
0
def loadChrm(ds, ref, chrm):
    if not os.path.exists(ref):
        error("Cannot find reference file: %s", ref)

    log("Reading reference genome chrm: %s" % chrm)
    i = 1
    for l in drdcommon.xopen(ref):
        l = l.strip()
        if i == 1 and l[0] == '>' and l[1:] == chrm:
            continue
        if i > 1 and l[0] == '>':
            break

        for bp in l:
            if bp.upper() != 'N':
                ds[0][i] = 1
            if i % 10000000 == 0:
                sys.stderr.write("MEM: %s nbp: %s\r" %
                                 (drdcommon.memory_usage(), i))
            i += 1
    log("\n%s bp read." % i)