示例#1
0
 def get_initial_allele_counts(self, fragment):
     '''Get allele counts from the initial time point'''
     import os
     from hivwholeseq.patients.samples import SamplePat
     for i in xrange(len(self.samples)):
         sample = SamplePat(self.samples.iloc[i])
         if os.path.isfile(sample.get_allele_counts_filename(fragment)):
             return sample.get_allele_counts(fragment)
示例#2
0
 def get_initial_allele_counts(self, fragment):
     '''Get allele counts from the initial time point'''
     import os
     from hivwholeseq.patients.samples import SamplePat
     for i in xrange(len(self.samples)):
         sample = SamplePat(self.samples.iloc[i])
         if os.path.isfile(sample.get_allele_counts_filename(fragment)):
             return sample.get_allele_counts(fragment)
    if not fragments:
        fragments = ['F'+str(i) for i in xrange(1, 7)]
    if VERBOSE >= 3:
        print 'fragments', fragments

    for fragment in fragments:
        counts = []
        for samplename, sample in samples.iterrows():
            if submit:
                fork_self(samplename, fragment, VERBOSE=VERBOSE, qual_min=qual_min)
                continue

            if VERBOSE >= 1:
                print fragment, samplename

            sample = SamplePat(sample)
            pname = sample.patient
            refseq = SeqIO.read(get_initial_reference_filename(pname, fragment), 'fasta')

            fn = sample.get_mapped_filtered_filename(fragment, PCR=PCR)
            if not os.path.isfile(fn):
                warn('No BAM file found', NoDataWarning)
                continue

            count, _ = gac(fn, len(refseq), qual_min=qual_min, VERBOSE=VERBOSE)
            counts.append(count)

            if save_to_file:
                fn_out = sample.get_allele_counts_filename(fragment, PCR=PCR,
                                                           qual_min=qual_min)
                count.dump(fn_out)
    VERBOSE = args.verbose
    qual_min = args.qualmin
    use_plot = args.plot

    samples = lssp()
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples.index.tolist()

    for region in regions:
        for samplename, sample in samples.iterrows():
            sample = SamplePat(sample)

            if VERBOSE >= 1:
                print region, samplename

            count = sample.get_allele_counts(region, qual_min=qual_min)

            if use_plot:
                x = np.tile(np.arange(count.shape[1]), (count.shape[0], 1))
                color = np.tile(np.arange(count.shape[0]), (count.shape[1], 1)).T

                fig, ax = plt.subplots(figsize=(12, 6))
                
                ax.scatter(x, count + 0.1, lw=2, c=color)
                ax.set_xlabel('Position [bp]')
                ax.set_ylabel('Coverage')
        for fragment in fragments:
            for samplename, sample in samples.iterrows():
                fork_self(samplename,
                          fragment,
                          VERBOSE=VERBOSE,
                          qual_min=qual_min,
                          PCR=PCR,
                          maxreads=maxreads,
                          use_tests=use_tests)
        sys.exit()

    counts_all = []
    for fragment in fragments:
        counts = []
        for samplename, sample in samples.iterrows():
            sample = SamplePat(sample)
            pname = sample.patient

            if VERBOSE >= 2:
                print pname, fragment, samplename

            refseq = SeqIO.read(
                get_initial_reference_filename(pname, fragment), 'fasta')

            fn_out = sample.get_allele_cocounts_filename(fragment,
                                                         PCR=PCR,
                                                         qual_min=qual_min,
                                                         compressed=True)
            fn = sample.get_mapped_filtered_filename(
                fragment, PCR=PCR, decontaminated=True)  #FIXME
            if save_to_file:
    VERBOSE = args.verbose
    qual_min = args.qualmin
    use_plot = args.plot

    samples = lssp()
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples.index.tolist()

    for protein in proteins:
        for samplename, sample in samples.iterrows():
            sample = SamplePat(sample)

            if VERBOSE >= 1:
                print protein, samplename

            count = sample.get_allele_counts_aa(protein, qual_min=qual_min)

            if use_plot:
                x = np.tile(np.arange(count.shape[1]), (count.shape[0], 1))
                color = np.tile(np.arange(count.shape[0]), (count.shape[1], 1)).T

                fig, ax = plt.subplots(figsize=(12, 6))
                
                ax.scatter(x, count + 0.1, lw=2, c=color)
                ax.set_xlabel('Position [aa]')
                ax.set_ylabel('Coverage')
示例#7
0
        print 'fragments', fragments

    for fragment in fragments:
        inses = []
        for samplename, sample in samples.iterrows():
            if submit:
                fork_self(samplename,
                          fragment,
                          VERBOSE=VERBOSE,
                          qual_min=qual_min)
                continue

            if VERBOSE >= 1:
                print fragment, samplename

            sample = SamplePat(sample)
            pname = sample.patient
            refseq = SeqIO.read(
                get_initial_reference_filename(pname, fragment), 'fasta')

            fn = sample.get_mapped_filtered_filename(fragment, PCR=PCR)
            if not os.path.isfile(fn):
                warn('No BAM file found', NoDataWarning)
                continue

            _, inse = gac(fn, len(refseq), qual_min=qual_min, VERBOSE=VERBOSE)
            inses.append(inse)

            if save_to_file:
                fn_out = sample.get_insertions_filename(fragment,
                                                        PCR=PCR,
示例#8
0
 def itersamples(self):
     '''Generator for samples in this patient, each with extended attributes'''
     from hivwholeseq.patients.samples import SamplePat
     for samplename, sample in self.samples.iterrows():
         yield SamplePat(sample)
示例#9
0
    args = parser.parse_args()
    pnames = args.patients
    samplenames = args.samples
    VERBOSE = args.verbose
    use_save = args.save

    fragments = ['F' + str(i + 1) for i in xrange(6)]

    samples = load_samples_sequenced()
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    for samplename, sample in samples.iterrows():
        sample = SamplePat(sample)
        if VERBOSE >= 1:
            print samplename

        dist_hists = []
        samples_seq = sample.get_sequenced_samples()
        samples_seq = samples_seq.loc[samples_seq.PCR == 1]
        for samplename_seq, sample_seq in samples_seq.iterrows():
            sample_seq = SampleSeq(sample_seq)
            data_folder = sample_seq.seqrun_folder
            adaID = sample_seq.adapter

            for fragment in fragments:
                try:
                    dist_hist = get_distance_histogram(data_folder,
                                                       adaID,
示例#10
0
    VERBOSE = args.verbose
    use_save = args.save
    use_plot = args.plot

    samples = load_samples_sequenced()
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples.index.tolist()

    data = defaultdict(dict)
    for samplename, sample in samples.iterrows():
        sample = SamplePat(sample)
        if VERBOSE >= 1:
            print samplename

        for (fr1, fr2) in izip(fragments[:-1], fragments[1:]):
            try:
                ac1 = sample.get_allele_counts(fr1)
                ac2 = sample.get_allele_counts(fr2)
            except IOError:
                continue

            if VERBOSE >= 2:
                print fr1, fr2

            # Filter positions by coverage
            covmin = 100
    if not fragments:
        fragments = ['F'+str(i) for i in xrange(1, 7)]
    if VERBOSE >= 3:
        print 'fragments', fragments

    if submit:
        for fragment in fragments:
            for samplename, sample in samples.iterrows():
                fork_self(samplename, fragment, VERBOSE=VERBOSE,
                          qual_min=qual_min, PCR=PCR)
        sys.exit()


    for samplename, sample in samples.iterrows():
        sample = SamplePat(sample)
        pname = sample.patient

        for fragment in fragments:

            if VERBOSE >= 1:
                print pname, samplename, fragment

            fn = sample.get_allele_cocounts_filename(fragment, PCR=PCR,
                                                     qual_min=qual_min,
                                                     compressed=False)
            
            fn_out = sample.get_allele_cocounts_filename(fragment, PCR=PCR,
                                                         qual_min=qual_min,
                                                         compressed=True)
    fragments = args.fragments
    submit = args.submit
    VERBOSE = args.verbose
    n_pairs = args.maxreads
    summary = args.summary
    PCR = args.PCR

    # Collect all sequenced samples from patients
    samples_pat = lssp()
    if pnames is not None:
        samples_seq = []
        for pname in pnames:
            patient = load_patient(pname)
            patient.discard_nonsequenced_samples()
            for samplename_pat, sample_pat in patient.samples.iterrows():
                sample_pat = SamplePat(sample_pat)
                samples_seq.append(sample_pat.samples_seq)
        samples_seq = pd.concat(samples_seq)

    elif samplenames is not None:
        samples_seq = lss()
        ind = samples_pat.index.isin(samplenames)
        samplenames_pat = samples_pat.index[ind]
        samples_seq = samples_seq.loc[samples_seq['patient sample'].isin(samplenames_pat)]

    else:
        samples_seq = lss()
        samples_seq = samples_seq.loc[samples_seq['patient sample'].isin(samples_pat.index)]


    if PCR != 'all':
    if VERBOSE >= 3:
        print 'fragments', fragments

    if submit:
        for fragment in fragments:
            for samplename, sample in samples.iterrows():
                fork_self(samplename, fragment, VERBOSE=VERBOSE,
                          qual_min=qual_min, PCR=PCR,
                          maxreads=maxreads, use_tests=use_tests)
        sys.exit()

    counts_all = []
    for fragment in fragments:
        counts = []
        for samplename, sample in samples.iterrows():
            sample = SamplePat(sample)
            pname = sample.patient

            if VERBOSE >= 2:
                print pname, fragment, samplename

            refseq = SeqIO.read(get_initial_reference_filename(pname, fragment), 'fasta')

            fn_out = sample.get_allele_cocounts_filename(fragment, PCR=PCR,
                                                         qual_min=qual_min,
                                                         compressed=True)
            fn = sample.get_mapped_filtered_filename(fragment, PCR=PCR,
                                                     decontaminated=True) #FIXME
            if save_to_file:
                cocount = gac(fn, len(refseq), 
                              maxreads=maxreads,
示例#14
0
        print 'fragments', fragments

    for fragment in fragments:
        counts = []
        for samplename, sample in samples.iterrows():
            if submit:
                fork_self(samplename,
                          fragment,
                          VERBOSE=VERBOSE,
                          qual_min=qual_min)
                continue

            if VERBOSE >= 1:
                print fragment, samplename

            sample = SamplePat(sample)
            pname = sample.patient
            refseq = SeqIO.read(
                get_initial_reference_filename(pname, fragment), 'fasta')

            fn = sample.get_mapped_filtered_filename(fragment, PCR=PCR)
            if not os.path.isfile(fn):
                warn('No BAM file found', NoDataWarning)
                continue

            count, _ = gac(fn, len(refseq), qual_min=qual_min, VERBOSE=VERBOSE)
            counts.append(count)

            if save_to_file:
                fn_out = sample.get_allele_counts_filename(fragment,
                                                           PCR=PCR,
    if VERBOSE >= 2:
        print 'samples', samples.index.tolist()

    counts_all = []
    for protein in proteins:
        counts = []
        for samplename, sample in samples.iterrows():
            if submit:
                fork_self(samplename, protein, VERBOSE=VERBOSE, qual_min=qual_min)
                continue

            if VERBOSE >= 1:
                print protein, samplename

            sample = SamplePat(sample)

            # NOTE: How do we find what fragment covers the protein? Well, a
            # protein can happily cross fragments. Since each
            # codon is independent, we should iterate over codons. We do not
            # do that for efficiency reasons. Instead, we identify all potential
            # fragments and split the protein into full codon chunks covered by
            # a single fragment.
            fragment_rois = sample.get_fragments_covered(protein,
                                                         include_coordinates=True)
            
            refseq = sample.get_reference(protein)
            fn_out = sample.get_allele_counts_filename(protein, PCR=PCR,
                                                       qual_min=qual_min,
                                                       type='aa')
示例#16
0
    VERBOSE = args.verbose
    qual_min = args.qualmin
    use_plot = args.plot

    samples = lssp()
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples.index.tolist()

    for protein in proteins:
        for samplename, sample in samples.iterrows():
            sample = SamplePat(sample)

            if VERBOSE >= 1:
                print protein, samplename

            count = sample.get_allele_counts_aa(protein, qual_min=qual_min)

            if use_plot:
                x = np.tile(np.arange(count.shape[1]), (count.shape[0], 1))
                color = np.tile(np.arange(count.shape[0]),
                                (count.shape[1], 1)).T

                fig, ax = plt.subplots(figsize=(12, 6))

                ax.scatter(x, count + 0.1, lw=2, c=color)
                ax.set_xlabel('Position [aa]')
示例#17
0
    VERBOSE = args.verbose
    qual_min = args.qualmin
    use_plot = args.plot

    samples = lssp()
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples.index.tolist()

    for region in regions:
        for samplename, sample in samples.iterrows():
            sample = SamplePat(sample)

            if VERBOSE >= 1:
                print region, samplename

            count = sample.get_allele_counts(region, qual_min=qual_min)

            if use_plot:
                x = np.tile(np.arange(count.shape[1]), (count.shape[0], 1))
                color = np.tile(np.arange(count.shape[0]),
                                (count.shape[1], 1)).T

                fig, ax = plt.subplots(figsize=(12, 6))

                ax.scatter(x, count + 0.1, lw=2, c=color)
                ax.set_xlabel('Position [bp]')
    PCR = args.PCR

    samples = lssp()
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print "samples", samples.index.tolist()

    for samplename, sample in samples.iterrows():
        if VERBOSE >= 1:
            print samplename

        sample = SamplePat(sample)
        pname = sample.patient
        ref = sample.get_reference("genomewide", "gb")

        # Collect the insertions (where possible)
        ics = {}
        for fragment in ["F" + str(i) for i in xrange(1, 7)]:
            try:
                ic = sample.get_insertions(fragment, merge_read_types=False)
            except IOError:
                continue
            start = find_annotation(ref, fragment).location.nofuzzy_start
            ics[(fragment, start)] = ic

        if not len(ics):
            if VERBOSE >= 1:
    PCR = args.PCR

    samples = lssp()
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples.index.tolist()

    for samplename, sample in samples.iterrows():
        if VERBOSE >= 1:
            print samplename

        sample = SamplePat(sample)
        pname = sample.patient
        conss_genomewide = SeqIO.read(get_initial_reference_filename(pname, 'genomewide'), 'fasta')

        # Collect the allele counts (where possible)
        acs = []
        for fragment in ['F'+str(i) for i in xrange(1, 7)]:
            try:
                ref = ''.join(SeqIO.read(get_initial_reference_filename(pname, fragment), 'fasta'))
                ac = sample.get_allele_counts(fragment, merge_read_types=False)
                acs.append((fragment, ref, ac))
            except IOError:
                continue

        if not len(acs):
            if VERBOSE >= 1:
    PCR = args.PCR

    samples = lssp()
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples.index.tolist()

    for samplename, sample in samples.iterrows():
        if VERBOSE >= 1:
            print samplename

        sample = SamplePat(sample)
        pname = sample.patient
        conss_genomewide = SeqIO.read(
            get_initial_reference_filename(pname, 'genomewide'), 'fasta')

        # Collect the allele counts (where possible)
        acs = []
        for fragment in ['F' + str(i) for i in xrange(1, 7)]:
            try:
                ref = ''.join(
                    SeqIO.read(get_initial_reference_filename(pname, fragment),
                               'fasta'))
                ac = sample.get_allele_counts(fragment, merge_read_types=False)
                acs.append((fragment, ref, ac))
            except IOError:
                continue
    PCR = args.PCR

    samples = lssp()
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples.index.tolist()

    for samplename, sample in samples.iterrows():
        if VERBOSE >= 1:
            print samplename

        sample = SamplePat(sample)
        pname = sample.patient
        ref = sample.get_reference('genomewide', 'gb')

        # Collect the insertions (where possible)
        ics = {}
        for fragment in ['F' + str(i) for i in xrange(1, 7)]:
            try:
                ic = sample.get_insertions(fragment, merge_read_types=False)
            except IOError:
                continue
            start = find_annotation(ref, fragment).location.nofuzzy_start
            ics[(fragment, start)] = ic

        if not len(ics):
            if VERBOSE >= 1:
示例#22
0
    counts_all = []
    for protein in proteins:
        counts = []
        for samplename, sample in samples.iterrows():
            if submit:
                fork_self(samplename,
                          protein,
                          VERBOSE=VERBOSE,
                          qual_min=qual_min)
                continue

            if VERBOSE >= 1:
                print protein, samplename

            sample = SamplePat(sample)

            # NOTE: How do we find what fragment covers the protein? Well, a
            # protein can happily cross fragments. Since each
            # codon is independent, we should iterate over codons. We do not
            # do that for efficiency reasons. Instead, we identify all potential
            # fragments and split the protein into full codon chunks covered by
            # a single fragment.
            fragment_rois = sample.get_fragments_covered(
                protein, include_coordinates=True)

            refseq = sample.get_reference(protein)
            fn_out = sample.get_allele_counts_filename(protein,
                                                       PCR=PCR,
                                                       qual_min=qual_min,
                                                       type='aa')
示例#23
0
        print 'Alignments'
        copy_folder(patient, pat_fn, 'alignments')


        print 'Trees'
        copy_folder(patient, pat_fn, 'trees')


        print 'Haplotypes'
        copy_folder(patient, pat_fn, 'haplotypes')


        print 'Samples'
        for samplename, sample in patient.samples.iterrows():
            print samplename
            sample = SamplePat(sample)

            print 'Make folder'
            sm_fn = pat_fn+samplename+os.sep
            if not strip_PCR1:
                sm_fn += 'PCR1'+os.sep
            mkdirs(sm_fn)


            print 'Consensus'
            copy_glob(sample, sm_fn, 'consensus')
            

            print 'Allele counts'
            copy_glob(sample, sm_fn, 'allele_counts')
            
示例#24
0
 def initial_sample(self):
     '''The initial sample used as a mapping reference'''
     from .samples import SamplePat
     return SamplePat(self.samples.iloc[0])
    n_pairs = args.maxreads
    skip_hash = args.skiphash
    summary = args.summary
    only_chunks = args.chunks
    filtered = args.filtered
    use_contaminated = args.include_contaminated

    # Collect all sequenced samples from patients
    samples_pat = lssp()
    if pnames is not None:
        samples_seq = []
        for pname in pnames:
            patient = load_patient(pname)
            patient.discard_nonsequenced_samples()
            for samplename_pat, sample_pat in patient.samples.iterrows():
                sample_pat = SamplePat(sample_pat)
                samples_seq.append(sample_pat.samples_seq)
        samples_seq = pd.concat(samples_seq)

    else:
        samples_seq = lss()
        ind = samples_pat.index.isin(samplenames)
        if ind.sum():
            samplenames_pat = samples_pat.index[ind]
            samples_seq = samples_seq.loc[samples_seq['patient sample'].isin(samplenames_pat)]
        else:
            samples_seq = samples_seq.loc[samples_seq.index.isin(samplenames)]

    if VERBOSE >= 2:
        print 'samples', samples_seq.index.tolist()
        
示例#26
0
    fragments = args.fragments
    submit = args.submit
    VERBOSE = args.verbose
    n_pairs = args.maxreads
    summary = args.summary
    PCR = args.PCR

    # Collect all sequenced samples from patients
    samples_pat = lssp()
    if pnames is not None:
        samples_seq = []
        for pname in pnames:
            patient = load_patient(pname)
            patient.discard_nonsequenced_samples()
            for samplename_pat, sample_pat in patient.samples.iterrows():
                sample_pat = SamplePat(sample_pat)
                samples_seq.append(sample_pat.samples_seq)
        samples_seq = pd.concat(samples_seq)

    elif samplenames is not None:
        samples_seq = lss()
        ind = samples_pat.index.isin(samplenames)
        samplenames_pat = samples_pat.index[ind]
        samples_seq = samples_seq.loc[samples_seq['patient sample'].isin(
            samplenames_pat)]

    else:
        samples_seq = lss()
        samples_seq = samples_seq.loc[samples_seq['patient sample'].isin(
            samples_pat.index)]
    args = parser.parse_args()
    pnames = args.patients
    samplenames = args.samples
    VERBOSE = args.verbose
    use_save = args.save

    fragments = ['F'+str(i+1) for i in xrange(6)]

    samples = load_samples_sequenced()
    if pnames is not None:
        samples = samples.loc[samples.patient.isin(pnames)]
    elif samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    for samplename, sample in samples.iterrows():
        sample = SamplePat(sample)
        if VERBOSE >= 1:
            print samplename

        dist_hists = []
        samples_seq = sample.get_sequenced_samples()
        samples_seq = samples_seq.loc[samples_seq.PCR == 1]
        for samplename_seq, sample_seq in samples_seq.iterrows():
            sample_seq = SampleSeq(sample_seq)
            data_folder = sample_seq.seqrun_folder
            adaID = sample_seq.adapter

            for fragment in fragments:
                try:
                    dist_hist = get_distance_histogram(data_folder, adaID, fragment,
                                                       VERBOSE=VERBOSE)
示例#28
0
    if not fragments:
        fragments = ['F'+str(i) for i in xrange(1, 7)]
    if VERBOSE >= 3:
        print 'fragments', fragments

    for fragment in fragments:
        inses = []
        for samplename, sample in samples.iterrows():
            if submit:
                fork_self(samplename, fragment, VERBOSE=VERBOSE, qual_min=qual_min)
                continue

            if VERBOSE >= 1:
                print fragment, samplename

            sample = SamplePat(sample)
            pname = sample.patient
            refseq = SeqIO.read(get_initial_reference_filename(pname, fragment), 'fasta')

            fn = sample.get_mapped_filtered_filename(fragment, PCR=PCR)
            if not os.path.isfile(fn):
                warn('No BAM file found', NoDataWarning)
                continue

            _, inse = gac(fn, len(refseq), qual_min=qual_min, VERBOSE=VERBOSE)
            inses.append(inse)

            if save_to_file:
                fn_out = sample.get_insertions_filename(fragment, PCR=PCR,
                                                        qual_min=qual_min)
                save_insertions(fn_out, inse)
    VERBOSE = args.verbose
    repn = args.repnumber
    samplename = args.sample

    patient = load_patient(pname)
    patient.discard_nonsequenced_samples()

    mkdirs(get_initial_reference_foldername(pname))

    if not fragments:
        fragments = ['F' + str(i) for i in xrange(1, 7)]
    if VERBOSE >= 3:
        print 'fragments', fragments

    if samplename is None:
        sample = SamplePat(patient.samples.iloc[samplen])
    else:
        sample = load_sample_sequenced(samplename)

    for fragment in fragments:
        sample_seq = SampleSeq(sample.samples_seq.iloc[repn])

        seq_run = sample_seq['seq run']
        adaID = sample_seq['adapter']
        dataset = sample_seq.sequencing_run
        data_folder = dataset.folder

        if VERBOSE:
            print 'Initial sample:', sample_seq.name, sample_seq['seq run'],
            print sample_seq.adapter