示例#1
0
            if VERBOSE >= 1:
                print protein, samplename

            sample = SamplePat(sample)

            # NOTE: How do we find what fragment covers the protein? Well, a
            # protein can happily cross fragments. Since each
            # codon is independent, we should iterate over codons. We do not
            # do that for efficiency reasons. Instead, we identify all potential
            # fragments and split the protein into full codon chunks covered by
            # a single fragment.
            fragment_rois = sample.get_fragments_covered(
                protein, include_coordinates=True)

            refseq = sample.get_reference(protein)
            fn_out = sample.get_allele_counts_filename(protein,
                                                       PCR=PCR,
                                                       qual_min=qual_min,
                                                       type='aa')

            from hivwholeseq.utils.sequence import alphaa
            count = np.zeros((len(alphaa), len(refseq) // 3), int)
            for frroi in fragment_rois:
                fragment = frroi['name']
                start_fr, end_fr = frroi['fragment']
                start, end = frroi['roi']

                # Check that we align with codons
                rf = start % 3
                if rf:
    samples = lssp()
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples.index.tolist()

    for samplename, sample in samples.iterrows():
        if VERBOSE >= 1:
            print samplename

        sample = SamplePat(sample)
        pname = sample.patient
        ref = sample.get_reference('genomewide', 'gb')

        # Collect the insertions (where possible)
        ics = {}
        for fragment in ['F' + str(i) for i in xrange(1, 7)]:
            try:
                ic = sample.get_insertions(fragment, merge_read_types=False)
            except IOError:
                continue
            start = find_annotation(ref, fragment).location.nofuzzy_start
            ics[(fragment, start)] = ic

        if not len(ics):
            if VERBOSE >= 1:
                print 'No data found: skipping'
            continue
    samples = lssp()
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print "samples", samples.index.tolist()

    for samplename, sample in samples.iterrows():
        if VERBOSE >= 1:
            print samplename

        sample = SamplePat(sample)
        pname = sample.patient
        ref = sample.get_reference("genomewide", "gb")

        # Collect the insertions (where possible)
        ics = {}
        for fragment in ["F" + str(i) for i in xrange(1, 7)]:
            try:
                ic = sample.get_insertions(fragment, merge_read_types=False)
            except IOError:
                continue
            start = find_annotation(ref, fragment).location.nofuzzy_start
            ics[(fragment, start)] = ic

        if not len(ics):
            if VERBOSE >= 1:
                print "No data found: skipping"
            continue
            if VERBOSE >= 1:
                print protein, samplename

            sample = SamplePat(sample)

            # NOTE: How do we find what fragment covers the protein? Well, a
            # protein can happily cross fragments. Since each
            # codon is independent, we should iterate over codons. We do not
            # do that for efficiency reasons. Instead, we identify all potential
            # fragments and split the protein into full codon chunks covered by
            # a single fragment.
            fragment_rois = sample.get_fragments_covered(protein,
                                                         include_coordinates=True)
            
            refseq = sample.get_reference(protein)
            fn_out = sample.get_allele_counts_filename(protein, PCR=PCR,
                                                       qual_min=qual_min,
                                                       type='aa')

            from hivwholeseq.utils.sequence import alphaa
            count = np.zeros((len(alphaa), len(refseq) // 3), int)
            for frroi in fragment_rois:
                fragment = frroi['name']
                start_fr, end_fr = frroi['fragment']
                start, end = frroi['roi']

                # Check that we align with codons
                rf = start % 3
                if rf:
                    start_fr += 3 - rf