def get_initial_allele_counts(self, fragment): '''Get allele counts from the initial time point''' import os from hivwholeseq.patients.samples import SamplePat for i in xrange(len(self.samples)): sample = SamplePat(self.samples.iloc[i]) if os.path.isfile(sample.get_allele_counts_filename(fragment)): return sample.get_allele_counts(fragment)
if pnames is not None: samples = samples.loc[samples.patient.isin(pnames)] elif samplenames is not None: samples = samples.loc[samples.index.isin(samplenames)] if VERBOSE >= 2: print 'samples', samples.index.tolist() for region in regions: for samplename, sample in samples.iterrows(): sample = SamplePat(sample) if VERBOSE >= 1: print region, samplename count = sample.get_allele_counts(region, qual_min=qual_min) if use_plot: x = np.tile(np.arange(count.shape[1]), (count.shape[0], 1)) color = np.tile(np.arange(count.shape[0]), (count.shape[1], 1)).T fig, ax = plt.subplots(figsize=(12, 6)) ax.scatter(x, count + 0.1, lw=2, c=color) ax.set_xlabel('Position [bp]') ax.set_ylabel('Coverage') ax.set_xlim(-1, count.shape[-1]) ax.set_ylim(ymin=0.09) ax.set_yscale('log') ax.grid(True)
if VERBOSE >= 1: print samplename sample = SamplePat(sample) pname = sample.patient conss_genomewide = SeqIO.read( get_initial_reference_filename(pname, 'genomewide'), 'fasta') # Collect the allele counts (where possible) acs = [] for fragment in ['F' + str(i) for i in xrange(1, 7)]: try: ref = ''.join( SeqIO.read(get_initial_reference_filename(pname, fragment), 'fasta')) ac = sample.get_allele_counts(fragment, merge_read_types=False) acs.append((fragment, ref, ac)) except IOError: continue if not len(acs): if VERBOSE >= 1: print 'No data found: skipping' continue # Merge allele counts ac = merge_allele_counts(conss_genomewide, acs, VERBOSE=VERBOSE) if save_to_file: fn_out = sample.get_allele_counts_filename('genomewide') np.save(fn_out, ac) if VERBOSE >= 1:
if pnames is not None: samples = samples.loc[samples.patient.isin(pnames)] elif samplenames is not None: samples = samples.loc[samples.index.isin(samplenames)] if VERBOSE >= 2: print 'samples', samples.index.tolist() for region in regions: for samplename, sample in samples.iterrows(): sample = SamplePat(sample) if VERBOSE >= 1: print region, samplename count = sample.get_allele_counts(region, qual_min=qual_min) if use_plot: x = np.tile(np.arange(count.shape[1]), (count.shape[0], 1)) color = np.tile(np.arange(count.shape[0]), (count.shape[1], 1)).T fig, ax = plt.subplots(figsize=(12, 6)) ax.scatter(x, count + 0.1, lw=2, c=color) ax.set_xlabel('Position [bp]') ax.set_ylabel('Coverage') ax.set_xlim(-1, count.shape[-1]) ax.set_ylim(ymin=0.09) ax.set_yscale('log') ax.grid(True) ax.set_title(samplename+', '+region)
samples = samples.loc[samples.patient.isin(pnames)] elif samplenames is not None: samples = samples.loc[samples.index.isin(samplenames)] if VERBOSE >= 2: print 'samples', samples.index.tolist() data = defaultdict(dict) for samplename, sample in samples.iterrows(): sample = SamplePat(sample) if VERBOSE >= 1: print samplename for (fr1, fr2) in izip(fragments[:-1], fragments[1:]): try: ac1 = sample.get_allele_counts(fr1) ac2 = sample.get_allele_counts(fr2) except IOError: continue if VERBOSE >= 2: print fr1, fr2 # Filter positions by coverage covmin = 100 indcm1 = (ac1.sum(axis=0) >= covmin) indcm2 = (ac2.sum(axis=0) >= covmin) ac1 = ac1[:, indcm1] ac2 = ac2[:, indcm2] c1 = alpha[ac1.argmax(axis=0)]
print 'samples', samples.index.tolist() for samplename, sample in samples.iterrows(): if VERBOSE >= 1: print samplename sample = SamplePat(sample) pname = sample.patient conss_genomewide = SeqIO.read(get_initial_reference_filename(pname, 'genomewide'), 'fasta') # Collect the allele counts (where possible) acs = [] for fragment in ['F'+str(i) for i in xrange(1, 7)]: try: ref = ''.join(SeqIO.read(get_initial_reference_filename(pname, fragment), 'fasta')) ac = sample.get_allele_counts(fragment, merge_read_types=False) acs.append((fragment, ref, ac)) except IOError: continue if not len(acs): if VERBOSE >= 1: print 'No data found: skipping' continue # Merge allele counts ac = merge_allele_counts(conss_genomewide, acs, VERBOSE=VERBOSE) if save_to_file: fn_out = sample.get_allele_counts_filename('genomewide') np.save(fn_out, ac) if VERBOSE >= 1: