Exemplo n.º 1
0
def build_hash_and_pickle(ref_fn, key_length, force_rebuild=False):
    reference_hash_pkl_fn = '{}_hash_keylength_{}.pkl'.format(
        splitext(ref_fn)[0], key_length)
    if exists(reference_hash_pkl_fn) and not force_rebuild:
        ref_genome_hash = pickle.load(open(reference_hash_pkl_fn, 'rb'))
        if max(map(len, ref_genome_hash)) == key_length:
            return ref_genome_hash
        else:
            pass
    else:
        pass
    reference = read_reference(ref_fn)
    ref_genome_hash = make_genome_hash(reference, key_length)
    pickle.dump(ref_genome_hash, open(reference_hash_pkl_fn, 'wb'))
    return ref_genome_hash
                        help='File containing a reference genome.')
    parser.add_argument('-r',
                        '--reads',
                        required=True,
                        dest='reads_file',
                        help='File containg sequencing reads.')
    parser.add_argument('-o',
                        '--outputFile',
                        required=True,
                        dest='output_file',
                        help='Output file name.')
    args = parser.parse_args()
    reference_fn = args.reference_file
    reads_fn = args.reads_file
    output_fn = args.output_file

    input_reads = read_reads(reads_fn)
    # This will take a while; you can use an array slice for example:
    #
    #   input_reads = reads[:300]
    #
    # to generate some data quickly.

    reference = read_reference(reference_fn)
    alignments, reads = trivial_algorithm(input_reads, reference)

    output_str = pretty_print_aligned_reads_with_ref(reads, alignments,
                                                     reference)
    with (open(output_fn, 'w')) as output_file:
        output_file.write(output_str)