if VERBOSE >= 1: print protein, samplename sample = SamplePat(sample) # NOTE: How do we find what fragment covers the protein? Well, a # protein can happily cross fragments. Since each # codon is independent, we should iterate over codons. We do not # do that for efficiency reasons. Instead, we identify all potential # fragments and split the protein into full codon chunks covered by # a single fragment. fragment_rois = sample.get_fragments_covered( protein, include_coordinates=True) refseq = sample.get_reference(protein) fn_out = sample.get_allele_counts_filename(protein, PCR=PCR, qual_min=qual_min, type='aa') from hivwholeseq.utils.sequence import alphaa count = np.zeros((len(alphaa), len(refseq) // 3), int) for frroi in fragment_rois: fragment = frroi['name'] start_fr, end_fr = frroi['fragment'] start, end = frroi['roi'] # Check that we align with codons rf = start % 3 if rf:
samples = lssp() if pnames is not None: samples = samples.loc[samples.patient.isin(pnames)] elif samplenames is not None: samples = samples.loc[samples.index.isin(samplenames)] if VERBOSE >= 2: print 'samples', samples.index.tolist() for samplename, sample in samples.iterrows(): if VERBOSE >= 1: print samplename sample = SamplePat(sample) pname = sample.patient ref = sample.get_reference('genomewide', 'gb') # Collect the insertions (where possible) ics = {} for fragment in ['F' + str(i) for i in xrange(1, 7)]: try: ic = sample.get_insertions(fragment, merge_read_types=False) except IOError: continue start = find_annotation(ref, fragment).location.nofuzzy_start ics[(fragment, start)] = ic if not len(ics): if VERBOSE >= 1: print 'No data found: skipping' continue
samples = lssp() if pnames is not None: samples = samples.loc[samples.patient.isin(pnames)] elif samplenames is not None: samples = samples.loc[samples.index.isin(samplenames)] if VERBOSE >= 2: print "samples", samples.index.tolist() for samplename, sample in samples.iterrows(): if VERBOSE >= 1: print samplename sample = SamplePat(sample) pname = sample.patient ref = sample.get_reference("genomewide", "gb") # Collect the insertions (where possible) ics = {} for fragment in ["F" + str(i) for i in xrange(1, 7)]: try: ic = sample.get_insertions(fragment, merge_read_types=False) except IOError: continue start = find_annotation(ref, fragment).location.nofuzzy_start ics[(fragment, start)] = ic if not len(ics): if VERBOSE >= 1: print "No data found: skipping" continue
if VERBOSE >= 1: print protein, samplename sample = SamplePat(sample) # NOTE: How do we find what fragment covers the protein? Well, a # protein can happily cross fragments. Since each # codon is independent, we should iterate over codons. We do not # do that for efficiency reasons. Instead, we identify all potential # fragments and split the protein into full codon chunks covered by # a single fragment. fragment_rois = sample.get_fragments_covered(protein, include_coordinates=True) refseq = sample.get_reference(protein) fn_out = sample.get_allele_counts_filename(protein, PCR=PCR, qual_min=qual_min, type='aa') from hivwholeseq.utils.sequence import alphaa count = np.zeros((len(alphaa), len(refseq) // 3), int) for frroi in fragment_rois: fragment = frroi['name'] start_fr, end_fr = frroi['fragment'] start, end = frroi['roi'] # Check that we align with codons rf = start % 3 if rf: start_fr += 3 - rf