示例#1
0
def align_and_pickle_mappings():
    print 'loading ref_hash pickle...'
    pickle_filename = 'ref_hash_{}_{}.pkl'.format(c.DATASET, c.KEY_SIZE)
    ref_hash = load_hash_pickle(pickle_filename)

    print 'loading ref...'
    ref_file = c.REF_FILE
    ref = utils.read_reference('{}/{}/{}'.format(c.DATA_PATH, c.DATASET, ref_file))

    print 'loading reads...'
    reads_file = c.READS_FILE
    reads = utils.read_reads('{}/{}/{}'.format(c.DATA_PATH, c.DATASET, reads_file))
    print 'loaded reads'

    print 'aligning {}...'.format(c.DATASET)
    start = time.time()
    alignments = [align_paired_read(pr, ref, ref_hash) for pr in reads]
    alignments = [x for x in alignments if x]
    print 'alignment complete, elapsed: {}'.format(time.time() - start)

    directory = '{}/{}/{}'.format(c.DATA_PATH, c.DATASET, 'alignments/')
    if not os.path.exists(directory):
        print 'creating folder {}'.format(directory)
        os.makedirs(directory)

    start = time.time()
    alignments_pickle_path = 'alignments_{}_{}.pkl'.format(c.DATASET, c.KEY_SIZE)
    pickle.dump(alignments, open(alignments_pickle_path, 'wb'))
    print '{} alignment pickled in {}'.format(reads_file, time.time() - start)
示例#2
0
def align_and_pickle_mappings_split(split_no, ref, ref_hash):
    print 'loading reads file {}...'.format(split_no)
    reads = utils.read_reads('{}/{}/reads_split/part_{}.txt'.format(c.DATA_PATH, c.DATASET, split_no))
    print 'loaded reads file {}'.format(split_no)

    print 'aligning {} part {}'.format(c.DATASET, split_no)
    start = time.time()
    alignments = [align_paired_read(pr, ref, ref_hash) for pr in reads]
    alignments = [x for x in alignments if x]
    print 'alignment complete, elapsed: {}'.format(time.time() - start)

    directory = '{}/{}/{}'.format(c.DATA_PATH, c.DATASET, 'alignments/')
    if not os.path.exists(directory):
        print 'creating folder {}'.format(directory)
        os.makedirs(directory)

    start = time.time()
    alignments_pickle_path = '{}/alignments_{}_{}_part_{}.pkl'.format(directory, c.DATASET, c.KEY_SIZE, split_no)
    pickle.dump(alignments, open(alignments_pickle_path, 'wb'))
    print '{} part {} alignment pickled in {}'.format(c.DATASET, split_no, time.time() - start)