Exemplo n.º 1
0
def read_sequences(filename, qualities=False, genbank_callback=None):
    """ Read fasta or illumina sequences, possibly compressed 
    
        Valid values for qualities: False,True,'required'
    
        Post reading filters can be applied.
    """
    assert qualities in (False,True,'required')
    
    parts = filename.split('~~')
    
    info = get_file_info(parts[0])
    
    have_qualities = False
    
    if 'type-empty' in info:
        have_qualities = True
        result = read_empty(parts[0])
    elif 'type-fasta' in info:
        result = read_fasta(parts[0])
    elif 'type-genbank' in info:
        result = read_genbank_sequence(parts[0], genbank_callback)
    elif 'type-fastq' in info:
        have_qualities = True
        result = read_illumina_with_quality(parts[0])    
    elif 'type-gff' in info:
        result = read_gff3_sequence(parts[0])
    elif 'type-sff' in info:
        f.close()
        grace.require_sff2fastq()
        have_qualities = True
        process = run(['sff2fastq', parts[0]])
        result = read_illumina_with_quality(process.stdout)
    else:
        raise grace.Error('Unrecognized file format for '+filename)

    if qualities == 'required' and not have_qualities:
        raise grace.Error('Need base qualities in '+filename)
    
    for part in parts[1:]:
        for prefix in FILTERS:
            if part.lower().startswith(prefix):
                result = FILTERS[prefix](result, part[len(prefix):])
                break
        else:
            raise grace.Error('Unrecognized filter: '+part)

    if have_qualities and not qualities:
        result = filter_no_qualities(result)
    
    return result   
Exemplo n.º 2
0
def read_sequences(filename, qualities=False, genbank_callback=None):
    """ Read fasta or illumina sequences, possibly compressed 
    
        Valid values for qualities: False,True,'required'
    
        Post reading filters can be applied.
    """
    assert qualities in (False, True, 'required')

    parts = filename.split('~~')

    info = get_file_info(parts[0])

    have_qualities = False

    if 'type-empty' in info:
        have_qualities = True
        result = read_empty(parts[0])
    elif 'type-fasta' in info:
        result = read_fasta(parts[0])
    elif 'type-genbank' in info:
        result = read_genbank_sequence(parts[0], genbank_callback)
    elif 'type-fastq' in info:
        have_qualities = True
        result = read_illumina_with_quality(parts[0])
    elif 'type-gff' in info:
        result = read_gff3_sequence(parts[0])
    elif 'type-sff' in info:
        f.close()
        grace.require_sff2fastq()
        have_qualities = True
        process = run(['sff2fastq', parts[0]])
        result = read_illumina_with_quality(process.stdout)
    else:
        raise grace.Error('Unrecognized file format for ' + filename)

    if qualities == 'required' and not have_qualities:
        raise grace.Error('Need base qualities in ' + filename)

    for part in parts[1:]:
        for prefix in FILTERS:
            if part.lower().startswith(prefix):
                result = FILTERS[prefix](result, part[len(prefix):])
                break
        else:
            raise grace.Error('Unrecognized filter: ' + part)

    if have_qualities and not qualities:
        result = filter_no_qualities(result)

    return result
Exemplo n.º 3
0
def read_sequences(filename, qualities=False, genbank_callback=None):
    """ Read fasta or illumina sequences, possibly compressed 
    
        Post reading filters can be applied.
    """

    parts = filename.split('~~')

    f = open_possibly_compressed_file(parts[0])
    peek = f.read(8)
    f.close()

    have_qualities = False

    if not peek:
        result = read_empty(parts[0])
    elif peek.startswith('>'):
        result = read_fasta(parts[0])
    elif peek.startswith('LOCUS'):
        result = read_genbank_sequence(parts[0], genbank_callback)
    elif peek.startswith('@'):
        have_qualities = True
        result = read_illumina_with_quality(parts[0])
    elif peek.startswith('##gff'):
        result = read_gff3_sequence(parts[0])
    elif peek.startswith('.sff'):
        f.close()
        grace.require_sff2fastq()
        have_qualities = True
        process = run(['sff2fastq', parts[0]])
        result = read_illumina_with_quality(process.stdout)
    else:
        raise grace.Error('Unrecognized file format for ' + filename)

    for part in parts[1:]:
        for prefix in FILTERS:
            if part.lower().startswith(prefix):
                result = FILTERS[prefix](result, part[len(prefix):])
                break
        else:
            raise grace.Error('Unrecognized filter: ' + part)

    if have_qualities and not qualities:
        result = filter_no_qualities(result)

    return result
Exemplo n.º 4
0
def read_sequences(filename, qualities=False, genbank_callback=None):
    """ Read fasta or illumina sequences, possibly compressed 
    
        Post reading filters can be applied.
    """
    
    parts = filename.split('~~')

    f = open_possibly_compressed_file(parts[0])
    peek = f.read(8)
    f.close()
    
    have_qualities = False
    
    if not peek:
        result = read_empty(parts[0])
    elif peek.startswith('>'):
        result = read_fasta(parts[0])
    elif peek.startswith('LOCUS'):
        result = read_genbank_sequence(parts[0], genbank_callback)
    elif peek.startswith('@'):
        have_qualities = True
        result = read_illumina_with_quality(parts[0])    
    elif peek.startswith('##gff'):
        result = read_gff3_sequence(parts[0])
    elif peek.startswith('.sff'):
        f.close()
        grace.require_sff2fastq()
        have_qualities = True
        process = run(['sff2fastq', parts[0]])
        result = read_illumina_with_quality(process.stdout)
    else:
        raise grace.Error('Unrecognized file format for '+filename)
    
    for part in parts[1:]:
        for prefix in FILTERS:
            if part.lower().startswith(prefix):
                result = FILTERS[prefix](result, part[len(prefix):])
                break
        else:
            raise grace.Error('Unrecognized filter: '+part)

    if have_qualities and not qualities:
        result = filter_no_qualities(result)
    
    return result