Пример #1
0
def make_output_folders(data_folder, adaIDs, VERBOSE=0):
    '''Make output folders for symlinking'''
    from hivwholeseq.utils.generic import mkdirs
    mkdirs(data_folder)
    if VERBOSE >= 1:
        print 'Folder created:', data_folder

    for adaID in adaIDs + [-1]:
        mkdirs(data_folder+foldername_adapter(adaID))
        if VERBOSE >= 1:
            print 'Folder created:', data_folder+foldername_adapter(adaID)
Пример #2
0
def make_output_folders(data_folder,
                        adapters_designed,
                        VERBOSE=0,
                        summary=True):
    '''Make output folders for all adapters and unclassified (e.g. PhiX)'''
    from hivwholeseq.utils.generic import mkdirs

    # Make folders for the samples
    for (adaID, s) in adapters_designed:
        dirname = foldername_adapter(adaID)
        mkdirs(data_folder + dirname)
        if VERBOSE:
            print 'Folder created:', dirname

    # Make a default directory for unclassified reads
    mkdirs(data_folder + 'unclassified_reads')
    if VERBOSE:
        print 'Folder created: unclassified reads'

    if summary:
        with open(get_demultiplex_summary_filename(data_folder), 'a') as f:
            f.write('\n')
            f.write(
                'Folders created for samples and unclassified reads (including phix).'
            )
            f.write('\n')
Пример #3
0
def get_reference_premap_hash_filename(data_folder, adaID, ext=True):
    '''Get the filename of the stampy hash of the reference used for premapping'''
    fn = 'reference'
    if ext:
        fn = fn + '.sthash'
    fn = data_folder + foldername_adapter(adaID) + 'premapped/' + fn
    return fn
Пример #4
0
def make_output_folders(data_folder, adaID, VERBOSE=0):
    '''Make output folders for the script'''
    from hivwholeseq.utils.generic import mkdirs
    dirname = data_folder + foldername_adapter(adaID) + 'map_iter/'
    mkdirs(dirname)
    if VERBOSE:
        print 'Folder created:', dirname
Пример #5
0
def get_reference_all_filename(data_folder, adaID, fragment, ext=True):
    '''Get the file with the cumulated consensi'''
    fn = '_'.join(['consensus', 'alliters', fragment])
    fn = data_folder + foldername_adapter(adaID) + 'map_iter/' + fn
    if ext:
        fn = fn + '.fasta'
    return fn
Пример #6
0
def get_mapped_filename(data_folder, adaID=None, fragment=None, type='bam', 
                        bwa=False, filtered=False, sort=False, part=None, unsorted=False,
                        rescue=False, trashed=False):
    '''Get the filename of the mapped reads onto consensus'''
    if fragment is None:
        raise ValueError('Select a fragment')
    filename = fragment
    if rescue:
        filename = filename + '_rescue'
    if bwa:
        filename = filename + '_bwa'
    if filtered:
        filename = filename + '_filtered'
    if trashed:
        filename = filename + '_trashed'
    if sort:
        filename = filename + '_sorted'
    elif part is not None:
        filename = filename+'_part'+str(part)
    elif unsorted:
        filename = filename+'_unsorted'

    filename = 'mapped/'+filename+'.'+type
    if adaID is not None:
        filename = foldername_adapter(adaID)+filename
    return data_folder+filename
Пример #7
0
def get_reference_premap_hash_filename(data_folder, adaID, ext=True):
    '''Get the filename of the stampy hash of the reference used for premapping'''
    fn = 'reference'
    if ext:
        fn = fn + '.sthash'
    fn = data_folder+foldername_adapter(adaID)+'premapped/'+fn
    return fn
Пример #8
0
def get_figure_folder(data_folder, adaID=None):
    '''Get the folder for figures for this sample'''
    folder = 'figures/'
    if adaID is not None:
        folder = foldername_adapter(adaID)+folder
    folder = data_folder+folder
    return folder
def make_output_folders(data_folder, adaID, VERBOSE=0):
    '''Make output folders for the script'''
    from hivwholeseq.utils.generic import mkdirs
    dirname = data_folder+foldername_adapter(adaID)+'map_iter/'
    mkdirs(dirname)
    if VERBOSE:
        print 'Folder created:', dirname
Пример #10
0
def get_mapped_filename(data_folder,
                        adaID=None,
                        fragment=None,
                        type='bam',
                        bwa=False,
                        filtered=False,
                        sort=False,
                        part=None,
                        unsorted=False,
                        rescue=False,
                        trashed=False):
    '''Get the filename of the mapped reads onto consensus'''
    if fragment is None:
        raise ValueError('Select a fragment')
    filename = fragment
    if rescue:
        filename = filename + '_rescue'
    if bwa:
        filename = filename + '_bwa'
    if filtered:
        filename = filename + '_filtered'
    if trashed:
        filename = filename + '_trashed'
    if sort:
        filename = filename + '_sorted'
    elif part is not None:
        filename = filename + '_part' + str(part)
    elif unsorted:
        filename = filename + '_unsorted'

    filename = 'mapped/' + filename + '.' + type
    if adaID is not None:
        filename = foldername_adapter(adaID) + filename
    return data_folder + filename
def get_reference_all_filename(data_folder, adaID, fragment, ext=True):
    '''Get the file with the cumulated consensi'''
    fn = '_'.join(['consensus', 'alliters', fragment])
    fn = data_folder+foldername_adapter(adaID)+'map_iter/'+fn
    if ext:
        fn = fn+'.fasta'
    return fn
Пример #12
0
def get_premapped_filename(data_folder,
                           adaID=None,
                           type='bam',
                           bwa=False,
                           part=None,
                           unsorted=False):
    '''Get the filename of the readed mapped to reference to split into fragments'''
    filename = 'premapped'
    filename = 'premapped/' + filename
    if adaID is not None:
        filename = foldername_adapter(adaID) + filename
    if part is not None:
        filename = filename + '_part' + str(part)
    elif unsorted:
        filename = filename + '_unsorted'

    if bwa:
        filename = filename + '_bwa'
    if type == 'sam':
        filename = filename + '.sam'
    elif type == 'bam':
        filename = filename + '.bam'
    else:
        raise ValueError('Type of mapped reads file not recognized')

    return data_folder + filename
Пример #13
0
def get_figure_folder(data_folder, adaID=None):
    '''Get the folder for figures for this sample'''
    folder = 'figures/'
    if adaID is not None:
        folder = foldername_adapter(adaID) + folder
    folder = data_folder + folder
    return folder
Пример #14
0
def get_reference_premap_filename(data_folder, adaID, fragment=None):
    '''Get the filename of the reference used from premapping'''
    fn = 'reference'
    if fragment is not None:
        fn = fn+'_'+fragment
    fn = fn+'.fasta'
    fn = data_folder+foldername_adapter(adaID)+'premapped/'+fn
    return fn
Пример #15
0
def get_consensus_old_filename(data_folder, adaID, fragment, trim_primers=True):
    '''Find the filename of the final consensus'''
    filename = 'consensus_old_'+fragment
    if not trim_primers:
        filename = filename+'_with_primers'
    filename = filename+'.fasta'
    filename = foldername_adapter(adaID)+filename
    return data_folder+filename
Пример #16
0
def get_hash_file(data_folder, adaID, fragment, ext=True):
    '''Get the index filename, with or w/o extension'''
    filename = 'consensus_' + fragment
    filename = 'hash/' + filename
    filename = foldername_adapter(adaID) + filename
    if ext:
        filename = filename + '.sthash'
    return data_folder + filename
Пример #17
0
def get_merged_consensus_filename(data_folder, adaID=None,
                                  fragments=['F1', 'F2', 'F3', 'F4', 'F5', 'F6']):
    '''Get the merged consensus of several fragments'''
    filename = 'consensus_'+'-'.join(fragments)+'.fasta'
    if adaID is not None:
        filename = foldername_adapter(adaID)+filename
    filename = data_folder+filename
    return filename
Пример #18
0
def get_hash_file(data_folder, adaID, fragment, ext=True):
    '''Get the index filename, with or w/o extension'''
    filename = 'consensus_'+fragment
    filename = 'hash/'+filename
    filename = foldername_adapter(adaID)+filename
    if ext:
        filename = filename+'.sthash'
    return data_folder+filename
Пример #19
0
def get_reference_premap_filename(data_folder, adaID, fragment=None):
    '''Get the filename of the reference used from premapping'''
    fn = 'reference'
    if fragment is not None:
        fn = fn + '_' + fragment
    fn = fn + '.fasta'
    fn = data_folder + foldername_adapter(adaID) + 'premapped/' + fn
    return fn
Пример #20
0
def get_build_consensus_summary_filename(data_folder, adaID, fragment='general',
                                         iterative=True):
    '''Get the filename of the summary of the iterative consensus'''
    filename = 'summary_build_consensus_'+fragment+'.txt'
    if iterative:
        filename = 'map_iter/'+filename
    filename = data_folder+foldername_adapter(adaID)+filename
    return filename
Пример #21
0
def get_mapped_filename(data_folder, adaID, fragment, n_iter, type='bam'):
    '''Get the mapped filenames'''
    filename = 'mapped_to_'
    if n_iter == 1:
        filename = filename + 'reference'
    else:
        filename = filename + 'consensus_' + str(n_iter - 1)
    filename = filename + '_' + fragment + '.' + type
    return data_folder + foldername_adapter(adaID) + 'map_iter/' + filename
Пример #22
0
def get_map_summary_filename(data_folder, adaID, fragment, rescue=False):
    '''Get the filename of the summary of the division into fragments'''
    filename = 'summary_map'+fragment
    if rescue:
        filename = filename+'_rescue'
    filename = filename+'.txt'
    filename = 'mapped/'+filename
    filename = data_folder+foldername_adapter(adaID)+filename
    return filename
def get_mapped_filename(data_folder, adaID, fragment, n_iter, type='bam'):
    '''Get the mapped filenames'''
    filename = 'mapped_to_'
    if n_iter == 1:
        filename = filename + 'reference'
    else:
        filename = filename + 'consensus_'+str(n_iter - 1)
    filename = filename+'_'+fragment+'.'+type
    return data_folder+foldername_adapter(adaID)+'map_iter/'+filename
Пример #24
0
def get_map_summary_filename(data_folder, adaID, fragment, rescue=False):
    '''Get the filename of the summary of the division into fragments'''
    filename = 'summary_map' + fragment
    if rescue:
        filename = filename + '_rescue'
    filename = filename + '.txt'
    filename = 'mapped/' + filename
    filename = data_folder + foldername_adapter(adaID) + filename
    return filename
Пример #25
0
    def __init__(self, *args, **kwargs):
        '''Initialize a sequenced sample'''
        super(SampleSeq, self).__init__(*args, **kwargs)

        from hivwholeseq.sequencing.filenames import get_seqrun_foldername
        from hivwholeseq.sequencing.adapter_info import foldername_adapter
        seq_run = self.loc['seq run']
        adaID = self.loc['adapter']
        self['folder'] = str(get_seqrun_foldername(seq_run)+foldername_adapter(adaID))
        self['seqrun_folder'] = str(get_seqrun_foldername(seq_run))
Пример #26
0
def get_build_consensus_summary_filename(data_folder,
                                         adaID,
                                         fragment='general',
                                         iterative=True):
    '''Get the filename of the summary of the iterative consensus'''
    filename = 'summary_build_consensus_' + fragment + '.txt'
    if iterative:
        filename = 'map_iter/' + filename
    filename = data_folder + foldername_adapter(adaID) + filename
    return filename
Пример #27
0
def get_merged_allele_frequencies_filename(data_folder,
                                           adaID,
                                           fragments=[
                                               'F1', 'F2', 'F3', 'F4', 'F5',
                                               'F6'
                                           ]):
    '''Get the merged allele frequencies of several fragments'''
    filename = 'allele_frequencies_' + '-'.join(fragments) + '.fasta'
    filename = data_folder + foldername_adapter(adaID) + filename
    return filename
Пример #28
0
    def __init__(self, *args, **kwargs):
        '''Initialize a sequenced sample'''
        super(SampleSeq, self).__init__(*args, **kwargs)

        from hivwholeseq.sequencing.filenames import get_seqrun_foldername
        from hivwholeseq.sequencing.adapter_info import foldername_adapter
        seq_run = self.loc['seq run']
        adaID = self.loc['adapter']
        self['folder'] = str(
            get_seqrun_foldername(seq_run) + foldername_adapter(adaID))
        self['seqrun_folder'] = str(get_seqrun_foldername(seq_run))
Пример #29
0
def get_consensus_old_filename(data_folder,
                               adaID,
                               fragment,
                               trim_primers=True):
    '''Find the filename of the final consensus'''
    filename = 'consensus_old_' + fragment
    if not trim_primers:
        filename = filename + '_with_primers'
    filename = filename + '.fasta'
    filename = foldername_adapter(adaID) + filename
    return data_folder + filename
Пример #30
0
def get_merged_consensus_filename(data_folder,
                                  adaID=None,
                                  fragments=[
                                      'F1', 'F2', 'F3', 'F4', 'F5', 'F6'
                                  ]):
    '''Get the merged consensus of several fragments'''
    filename = 'consensus_' + '-'.join(fragments) + '.fasta'
    if adaID is not None:
        filename = foldername_adapter(adaID) + filename
    filename = data_folder + filename
    return filename
def get_reference_filename(data_folder, adaID, fragment, n_iter, ext=True):
    '''Get the reference filename for the intermediate mappings'''
    if n_iter == 1:
        fn = get_reference_premap_filename(data_folder, adaID, fragment)
        if not ext:
            fn = fn[:-6]
    else:
        fn = '_'.join(['consensus', str(n_iter-1), fragment])
        fn = data_folder+foldername_adapter(adaID)+'map_iter/'+fn
        if ext:
            fn = fn+'.fasta'
    return fn
Пример #32
0
def get_divided_filename(data_folder, adaID=None, fragment=None, type='bam', chunk=None):
    '''Get the filename of the BAM files divided for a single fragment'''
    filename = 'divided'
    filename = 'divided/'+filename
    if adaID is not None:
        filename = foldername_adapter(adaID)+filename
    filename = data_folder+filename
    filename = filename+'_'+fragment
    if chunk is not None:
        filename = filename+'_chunk_'+str(chunk)
    filename = filename+'.'+type
    return filename
Пример #33
0
def get_reference_filename(data_folder, adaID, fragment, n_iter, ext=True):
    '''Get the reference filename for the intermediate mappings'''
    if n_iter == 1:
        fn = get_reference_premap_filename(data_folder, adaID, fragment)
        if not ext:
            fn = fn[:-6]
    else:
        fn = '_'.join(['consensus', str(n_iter - 1), fragment])
        fn = data_folder + foldername_adapter(adaID) + 'map_iter/' + fn
        if ext:
            fn = fn + '.fasta'
    return fn
Пример #34
0
def get_divided_filenames(data_folder, adaID=None, fragments=None, type='bam'):
    '''Get the filenames of the BAM files divided by fragment'''
    filename = 'divided'
    filename = 'divided/' + filename
    if adaID is not None:
        filename = foldername_adapter(adaID) + filename
    filename = data_folder + filename
    filenames = []
    for fragment in (list(fragments) +
                     ['ambiguous', 'crossmapped', 'unmapped', 'low_quality']):
        fnf = filename + '_' + fragment + '.' + type
        filenames.append(fnf)
    return filenames
Пример #35
0
def get_divided_filenames(data_folder, adaID=None, fragments=None, type='bam'):
    '''Get the filenames of the BAM files divided by fragment'''
    filename = 'divided'
    filename = 'divided/'+filename
    if adaID is not None:
        filename = foldername_adapter(adaID)+filename
    filename = data_folder+filename
    filenames = []
    for fragment in (list(fragments) + ['ambiguous', 'crossmapped',
                                        'unmapped', 'low_quality']):
        fnf = filename+'_'+fragment+'.'+type
        filenames.append(fnf)
    return filenames
Пример #36
0
def get_read_filenames(data_folder, adaID=None, fragment=None, suffix='',
                       gzip=False, trimmed=False):
    '''Get the filenames of the demultiplexed reads'''
    filenames = ['read1', 'read2']
    for i,fn in enumerate(filenames):
        if adaID is not None:
            fn = foldername_adapter(adaID)+fn
        fn = data_folder+fn
        if trimmed:
            fn = fn+'_trimmed'
        fn = fn+suffix+'.fastq'
        if gzip:
            fn = fn+'.gz' 
        filenames[i] = fn
    return filenames
Пример #37
0
def get_divided_filename(data_folder,
                         adaID=None,
                         fragment=None,
                         type='bam',
                         chunk=None):
    '''Get the filename of the BAM files divided for a single fragment'''
    filename = 'divided'
    filename = 'divided/' + filename
    if adaID is not None:
        filename = foldername_adapter(adaID) + filename
    filename = data_folder + filename
    filename = filename + '_' + fragment
    if chunk is not None:
        filename = filename + '_chunk_' + str(chunk)
    filename = filename + '.' + type
    return filename
Пример #38
0
def get_read_filenames(data_folder,
                       adaID=None,
                       fragment=None,
                       suffix='',
                       gzip=False,
                       trimmed=False):
    '''Get the filenames of the demultiplexed reads'''
    filenames = ['read1', 'read2']
    for i, fn in enumerate(filenames):
        if adaID is not None:
            fn = foldername_adapter(adaID) + fn
        fn = data_folder + fn
        if trimmed:
            fn = fn + '_trimmed'
        fn = fn + suffix + '.fastq'
        if gzip:
            fn = fn + '.gz'
        filenames[i] = fn
    return filenames
Пример #39
0
def make_output_folders(data_folder, adapters_designed, VERBOSE=0, summary=True):
    '''Make output folders for all adapters and unclassified (e.g. PhiX)'''
    from hivwholeseq.utils.generic import mkdirs

    # Make folders for the samples
    for (adaID, s) in adapters_designed:
            dirname = foldername_adapter(adaID)
            mkdirs(data_folder+dirname)
            if VERBOSE:
                print 'Folder created:', dirname

    # Make a default directory for unclassified reads
    mkdirs(data_folder+'unclassified_reads')
    if VERBOSE:
        print 'Folder created: unclassified reads'

    if summary:
        with open(get_demultiplex_summary_filename(data_folder), 'a') as f:
            f.write('\n')
            f.write('Folders created for samples and unclassified reads (including phix).')
            f.write('\n')
Пример #40
0
def get_premapped_filename(data_folder, adaID=None, type='bam', bwa=False,
                           part=None, unsorted=False):
    '''Get the filename of the readed mapped to reference to split into fragments'''
    filename = 'premapped'
    filename = 'premapped/'+filename
    if adaID is not None:
        filename = foldername_adapter(adaID)+filename
    if part is not None:
        filename = filename+'_part'+str(part)
    elif unsorted:
        filename = filename+'_unsorted'

    if bwa:
        filename = filename + '_bwa'
    if type == 'sam':
        filename = filename + '.sam'
    elif type == 'bam':
        filename = filename + '.bam'
    else:
        raise ValueError('Type of mapped reads file not recognized')

    return data_folder+filename
Пример #41
0
def get_fragment_positions_filename(data_folder, adaID):
    '''Get the filename of the positions of fragments in the reference for premap'''
    filename = 'fragment_positions_premapped.dat'
    filename = 'divided/'+filename
    return data_folder+foldername_adapter(adaID)+filename
Пример #42
0
def get_mutations_file(data_folder, adaID, fragment):
    '''Get the filename with the mutations for all reads'''
    filename = 'mutations_' + fragment + '.pickle'
    filename = foldername_adapter(adaID) + filename
    return data_folder + filename
Пример #43
0
def get_filter_mapped_summary_filename(data_folder, adaID, fragment):
    '''Get the filename of the summary of the division into fragments'''
    filename = 'summary_filter_'+fragment+'.txt'
    filename = 'mapped/'+filename
    filename = data_folder+foldername_adapter(adaID)+filename
    return filename
Пример #44
0
def get_filter_mapped_summary_filename(data_folder, adaID, fragment):
    '''Get the filename of the summary of the division into fragments'''
    filename = 'summary_filter_' + fragment + '.txt'
    filename = 'mapped/' + filename
    filename = data_folder + foldername_adapter(adaID) + filename
    return filename
Пример #45
0
def get_fragment_positions_filename(data_folder, adaID):
    '''Get the filename of the positions of fragments in the reference for premap'''
    filename = 'fragment_positions_premapped.dat'
    filename = 'divided/' + filename
    return data_folder + foldername_adapter(adaID) + filename
Пример #46
0
def get_premap_summary_filename(data_folder, adaID):
    '''Get the filename of the premap to reference'''
    filename = 'summary_premapped.txt'
    filename = 'premapped/'+filename
    filename = data_folder+foldername_adapter(adaID)+filename
    return filename
Пример #47
0
def get_divide_summary_filename(data_folder, adaID):
    '''Get the filename of the summary of the division into fragments'''
    filename = 'summary_divide.txt'
    filename = 'divided/' + filename
    filename = data_folder + foldername_adapter(adaID) + filename
    return filename
Пример #48
0
def get_premap_summary_filename(data_folder, adaID):
    '''Get the filename of the premap to reference'''
    filename = 'summary_premapped.txt'
    filename = 'premapped/' + filename
    filename = data_folder + foldername_adapter(adaID) + filename
    return filename
Пример #49
0
def get_trim_summary_filename(data_folder, adaID):
    '''Get the filename of the trim low quality'''
    filename = 'summary_trim.txt'
    filename = data_folder + foldername_adapter(adaID) + filename
    return filename
Пример #50
0
def get_divide_summary_filename(data_folder, adaID):
    '''Get the filename of the summary of the division into fragments'''
    filename = 'summary_divide.txt'
    filename = 'divided/'+filename
    filename = data_folder+foldername_adapter(adaID)+filename
    return filename
Пример #51
0
def get_trim_summary_filename(data_folder, adaID):
    '''Get the filename of the trim low quality'''
    filename = 'summary_trim.txt'
    filename = data_folder+foldername_adapter(adaID)+filename
    return filename
Пример #52
0
def get_read_unpaired_filename(data_folder, adaID):
    '''Get the reads pairs for which one read is low quality'''
    fn = 'reads_unpaired.fastq'
    fn = foldername_adapter(adaID) + fn
    fn = data_folder + fn
    return fn
Пример #53
0
def get_mapped_suspicious_filename(data_folder, adaID, fragment, type='bam'):
    '''The the filename of the mapped reads with many mutations from consensus'''
    filename = fragment + '_suspicious.' + type
    filename = data_folder + foldername_adapter(adaID) + 'mapped/' + filename
    return filename
Пример #54
0
def get_merged_allele_frequencies_filename(data_folder, adaID,
                                    fragments=['F1', 'F2', 'F3', 'F4', 'F5', 'F6']):
    '''Get the merged allele frequencies of several fragments'''
    filename = 'allele_frequencies_'+'-'.join(fragments)+'.fasta'
    filename = data_folder+foldername_adapter(adaID)+filename
    return filename
Пример #55
0
def get_read_unpaired_filename(data_folder, adaID):
    '''Get the reads pairs for which one read is low quality'''
    fn = 'reads_unpaired.fastq'
    fn = foldername_adapter(adaID)+fn
    fn = data_folder+fn
    return fn
Пример #56
0
def get_allele_frequencies_filename(data_folder, adaID, fragment):
    '''Get the filename with the corrected allele frequencies'''
    filename = 'allele_frequencies_'+fragment+'.npy'
    filename = foldername_adapter(adaID)+filename
    return data_folder+filename
Пример #57
0
def get_mapped_suspicious_filename(data_folder, adaID, fragment, type='bam'):
    '''The the filename of the mapped reads with many mutations from consensus'''
    filename = fragment+'_suspicious.'+type
    filename = data_folder+foldername_adapter(adaID)+'mapped/'+filename
    return filename
Пример #58
0
def get_allele_frequencies_filename(data_folder, adaID, fragment):
    '''Get the filename with the corrected allele frequencies'''
    filename = 'allele_frequencies_' + fragment + '.npy'
    filename = foldername_adapter(adaID) + filename
    return data_folder + filename
Пример #59
0
def get_mutations_file(data_folder, adaID, fragment):
    '''Get the filename with the mutations for all reads'''
    filename = 'mutations_'+fragment+'.pickle'
    filename = foldername_adapter(adaID)+filename
    return data_folder+filename