Пример #1
0
def main():
    (options, args) = setup_options()

    if not os.path.exists(options.output_dir):
        os.makedirs(options.output_dir)

    # Split backbone fasta file into chunks.
    backbone_to_id = split_backbone(options)

    # Find the reads that correspond to a given backbone.
    reads_to_backbone = build_reads_to_backbone_dict(options)

    # Split the reads based on their corresponding backbone file.
    file_pointers_dict = {}

    pf = ParseFasta(options.reads_filename)
    tuple = pf.getRecord()
    id = None
    while tuple is not None:

        if len(reads_to_backbone[tuple[0]]) > 0:
            for backbone in reads_to_backbone[tuple[0]]:
                id = backbone_to_id[backbone]

                new_fp = open(
                    options.output_dir + '/' + str(id) + '.reads.fasta', 'a')
                new_fp.write('>' + tuple[0] + '\n' + tuple[1] + '\n')
                new_fp.close()

        tuple = pf.getRecord()
Пример #2
0
def split_backbone(options):
    """ 
    Split backbone fasta file into chunks. 
    Returns dictionary of backbone -> id.
    """

    backbone_to_id = {}
    id_counter = 0

    # Write all backbone files to their own fasta file.
    pf = ParseFasta(options.backbone_filename)
    tuple = pf.getRecord()
    while tuple is not None:
        print tuple[0]

        split_backbone = open(
            options.output_dir + '/' + options.prefix + '-' + str(id_counter) +
            '.fasta', 'w')
        split_backbone.write('>' + tuple[0] + '\n' + tuple[1])
        split_backbone.close()

        backbone_to_id[tuple[0]] = options.prefix + '-' + str(id_counter)

        id_counter += 1
        tuple = pf.getRecord()

    return backbone_to_id
def main():
    (options, args) = setup_options()

    if not os.path.exists(options.output_dir):
        os.makedirs(options.output_dir)

    # Split backbone fasta file into chunks.
    backbone_to_id = split_backbone(options)  

    # Find the reads that correspond to a given backbone.
    reads_to_backbone = build_reads_to_backbone_dict(options)

    # Split the reads based on their corresponding backbone file.
    file_pointers_dict = {}

    pf = ParseFasta(options.reads_filename)
    tuple = pf.getRecord()
    id = None
    while tuple is not None:

        if len(reads_to_backbone[tuple[0]]) > 0:
            for backbone in reads_to_backbone[tuple[0]]:
                id = backbone_to_id[backbone]

                new_fp = open(options.output_dir + '/' + str(id) + '.reads.fasta', 'a')
                new_fp.write('>' + tuple[0] + '\n' + tuple[1] + '\n')
                new_fp.close()
                
        tuple = pf.getRecord()
def split_backbone(options):
    """ 
    Split backbone fasta file into chunks. 
    Returns dictionary of backbone -> id.
    """

    backbone_to_id = {}
    id_counter = 0

    # Write all backbone files to their own fasta file.
    pf = ParseFasta(options.backbone_filename)
    tuple = pf.getRecord()
    while tuple is not None:
        print tuple[0]

        split_backbone = open(options.output_dir + '/' + options.prefix + '-' + str(id_counter) + '.fasta', 'w')
        split_backbone.write('>' + tuple[0] + '\n' + tuple[1])
        split_backbone.close()

        backbone_to_id[tuple[0]] = options.prefix + '-' + str(id_counter)

        id_counter += 1
        tuple = pf.getRecord()

    return backbone_to_id
Пример #5
0
def build_readseqs_dict(options):
    readseqs = {}
    pf = ParseFasta(options.reads_filename) ## typically 'ctg_pb.fasta' in pipeline
    tuple = pf.getRecord()
    while tuple is not None:
        readseqs[tuple[0]] = tuple[1]
        tuple = pf.getRecord()
    return readseqs
Пример #6
0
def main():
    (options, args) = setup_options()

    if not os.path.exists(options.output_dir):
        os.makedirs(options.output_dir)

    # Split backbone fasta file into chunks.
    backbone_to_id = split_backbone(options)
    # use: backbone_to_id[seqname] = fileprefix-number

    # Find the reads that correspond to a given backbone.
    reads_to_backbone = build_reads_to_backbone_dict(options)
    ## use: reads_to_backbone[read_or_contig_name].append(backbone_name)
    
    
    ## New approach:
    ## open and close files as you need them
    ## A little slower than original

    pf = ParseFasta(options.reads_filename)
    tuple = pf.getRecord()
    id = None
    while tuple is not None:

        if len(reads_to_backbone[tuple[0]]) > 0:
            for backbone in reads_to_backbone[tuple[0]]:
                id = backbone_to_id[backbone]

                fname = options.output_dir + '/' + str(id) + '.reads.fasta'
                
                if os.path.isfile(fname):
                    with open(fname, 'a') as f:
                        f.write('>' + tuple[0] + '\n' + tuple[1] + '\n')

                else:
                    with open(fname, 'w') as f:
                        f.write('>' + tuple[0] + '\n' + tuple[1] + '\n')

        tuple = pf.getRecord()
Пример #7
0
def main():
    (options, args) = setup_options()

    if not os.path.exists(options.output_dir):
        os.makedirs(options.output_dir)

    # Split backbone fasta file into chunks.
    backbone_to_id = split_backbone(options)

    # Find the reads that correspond to a given backbone.
    reads_to_backbone = build_reads_to_backbone_dict(options)

    # Split the reads based on their corresponding backbone file.
    file_pointers_dict = {}

    pf = ParseFasta(options.reads_filename)
    tuple = pf.getRecord()
    id = None
    while tuple is not None:

        if len(reads_to_backbone[tuple[0]]) > 0:
            for backbone in reads_to_backbone[tuple[0]]:
                id = backbone_to_id[backbone]

                # print tuple[0] + '\t-->\t' + options.output_dir + '/' + str(id) + '.reads.fasta'

                if id in file_pointers_dict:
                    file_pointers_dict[id].write(">" + tuple[0] + "\n" + tuple[1] + "\n")

                else:
                    new_fp = open(options.output_dir + "/" + str(id) + ".reads.fasta", "w")
                    file_pointers_dict[id] = new_fp
                    file_pointers_dict[id].write(">" + tuple[0] + "\n" + tuple[1] + "\n")
        # else:
        #    print 'MISSING\t' + tuple[0]

        tuple = pf.getRecord()