示例#1
0
 def get_initial_allele_counts(self, fragment):
     '''Get allele counts from the initial time point'''
     import os
     from hivwholeseq.patients.samples import SamplePat
     for i in xrange(len(self.samples)):
         sample = SamplePat(self.samples.iloc[i])
         if os.path.isfile(sample.get_allele_counts_filename(fragment)):
             return sample.get_allele_counts(fragment)
示例#2
0
 def get_initial_allele_counts(self, fragment):
     '''Get allele counts from the initial time point'''
     import os
     from hivwholeseq.patients.samples import SamplePat
     for i in xrange(len(self.samples)):
         sample = SamplePat(self.samples.iloc[i])
         if os.path.isfile(sample.get_allele_counts_filename(fragment)):
             return sample.get_allele_counts(fragment)
示例#3
0
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples.index.tolist()

    for region in regions:
        for samplename, sample in samples.iterrows():
            sample = SamplePat(sample)

            if VERBOSE >= 1:
                print region, samplename

            count = sample.get_allele_counts(region, qual_min=qual_min)

            if use_plot:
                x = np.tile(np.arange(count.shape[1]), (count.shape[0], 1))
                color = np.tile(np.arange(count.shape[0]),
                                (count.shape[1], 1)).T

                fig, ax = plt.subplots(figsize=(12, 6))

                ax.scatter(x, count + 0.1, lw=2, c=color)
                ax.set_xlabel('Position [bp]')
                ax.set_ylabel('Coverage')
                ax.set_xlim(-1, count.shape[-1])
                ax.set_ylim(ymin=0.09)
                ax.set_yscale('log')
                ax.grid(True)
        if VERBOSE >= 1:
            print samplename

        sample = SamplePat(sample)
        pname = sample.patient
        conss_genomewide = SeqIO.read(
            get_initial_reference_filename(pname, 'genomewide'), 'fasta')

        # Collect the allele counts (where possible)
        acs = []
        for fragment in ['F' + str(i) for i in xrange(1, 7)]:
            try:
                ref = ''.join(
                    SeqIO.read(get_initial_reference_filename(pname, fragment),
                               'fasta'))
                ac = sample.get_allele_counts(fragment, merge_read_types=False)
                acs.append((fragment, ref, ac))
            except IOError:
                continue

        if not len(acs):
            if VERBOSE >= 1:
                print 'No data found: skipping'
            continue

        # Merge allele counts
        ac = merge_allele_counts(conss_genomewide, acs, VERBOSE=VERBOSE)
        if save_to_file:
            fn_out = sample.get_allele_counts_filename('genomewide')
            np.save(fn_out, ac)
            if VERBOSE >= 1:
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples.index.tolist()

    for region in regions:
        for samplename, sample in samples.iterrows():
            sample = SamplePat(sample)

            if VERBOSE >= 1:
                print region, samplename

            count = sample.get_allele_counts(region, qual_min=qual_min)

            if use_plot:
                x = np.tile(np.arange(count.shape[1]), (count.shape[0], 1))
                color = np.tile(np.arange(count.shape[0]), (count.shape[1], 1)).T

                fig, ax = plt.subplots(figsize=(12, 6))
                
                ax.scatter(x, count + 0.1, lw=2, c=color)
                ax.set_xlabel('Position [bp]')
                ax.set_ylabel('Coverage')
                ax.set_xlim(-1, count.shape[-1])
                ax.set_ylim(ymin=0.09)
                ax.set_yscale('log')
                ax.grid(True)
                ax.set_title(samplename+', '+region)
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples.index.tolist()

    data = defaultdict(dict)
    for samplename, sample in samples.iterrows():
        sample = SamplePat(sample)
        if VERBOSE >= 1:
            print samplename

        for (fr1, fr2) in izip(fragments[:-1], fragments[1:]):
            try:
                ac1 = sample.get_allele_counts(fr1)
                ac2 = sample.get_allele_counts(fr2)
            except IOError:
                continue

            if VERBOSE >= 2:
                print fr1, fr2

            # Filter positions by coverage
            covmin = 100
            indcm1 = (ac1.sum(axis=0) >= covmin)
            indcm2 = (ac2.sum(axis=0) >= covmin)
            ac1 = ac1[:, indcm1]
            ac2 = ac2[:, indcm2]

            c1 = alpha[ac1.argmax(axis=0)]
        print 'samples', samples.index.tolist()

    for samplename, sample in samples.iterrows():
        if VERBOSE >= 1:
            print samplename

        sample = SamplePat(sample)
        pname = sample.patient
        conss_genomewide = SeqIO.read(get_initial_reference_filename(pname, 'genomewide'), 'fasta')

        # Collect the allele counts (where possible)
        acs = []
        for fragment in ['F'+str(i) for i in xrange(1, 7)]:
            try:
                ref = ''.join(SeqIO.read(get_initial_reference_filename(pname, fragment), 'fasta'))
                ac = sample.get_allele_counts(fragment, merge_read_types=False)
                acs.append((fragment, ref, ac))
            except IOError:
                continue

        if not len(acs):
            if VERBOSE >= 1:
                print 'No data found: skipping'
            continue

        # Merge allele counts
        ac = merge_allele_counts(conss_genomewide, acs, VERBOSE=VERBOSE)
        if save_to_file:
            fn_out = sample.get_allele_counts_filename('genomewide')
            np.save(fn_out, ac)
            if VERBOSE >= 1: